An overhaul of VEX's floating point handling, to facilitate correct

author Julian Seward <jseward@acm.org>

Fri, 3 Feb 2006 16:08:03 +0000 (16:08 +0000)

committer Julian Seward <jseward@acm.org>

Fri, 3 Feb 2006 16:08:03 +0000 (16:08 +0000)
author Julian Seward <jseward@acm.org>
Fri, 3 Feb 2006 16:08:03 +0000 (16:08 +0000)
committer Julian Seward <jseward@acm.org>
Fri, 3 Feb 2006 16:08:03 +0000 (16:08 +0000)
diff --git a/VEX/priv/guest-amd64/toIR.c b/VEX/priv/guest-amd64/toIR.c

index ab3034c9eb27281228261ad785e0501331599a9e..1573f4e415362eed7b4848a5d01bc6079ac080b2 100644 (file)
--- a/VEX/priv/guest-amd64/toIR.c
+++ b/VEX/priv/guest-amd64/toIR.c
@@ -4839,7 +4839,7 @@ ULong dis_FPU ( /*OUT*/Bool* decode_ok,
              case 0xFC: /* FRNDINT */
                 DIP("frndint\n");
                 put_ST_UNCHECKED(0,
-                  binop(Iop_RoundF64, get_roundingmode(), get_ST(0)) );
+                  binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
                 break;
  
              case 0xFD: /* FSCALE */
diff --git a/VEX/priv/guest-ppc/toIR.c b/VEX/priv/guest-ppc/toIR.c

index 2bb94773441d9647bfd2c3f84f124a0a03dcb754..010e6b7de71f6af8773174d9e1c9f7714de2e67f 100644 (file)
--- a/VEX/priv/guest-ppc/toIR.c
+++ b/VEX/priv/guest-ppc/toIR.c
@@ -54,12 +54,6 @@
     - lvxl,stvxl: load/store with 'least recently used' hint
     - vexptefp, vlogefp
  
-   Floating Point
-   - Single precision stores are rounded twice - once by F64toF32,
-     and then again by the backend for storeBE( F32 ), giving a loss
-     of precision.
-
-
     LIMITATIONS:
  
     Various, including:
@@ -71,6 +65,7 @@
       - All exceptions disabled in FPSCR
       - condition codes not set in FPSCR
       - some error in accuracy
+     - flt->int conversions are dubious in overflow cases
  
     - Altivec floating point:
       - vmaddfp, vnmsubfp
@@ -483,6 +478,11 @@ static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
     return IRExpr_Binop(op, a1, a2);
  }
  
+static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
+{
+   return IRExpr_Triop(op, a1, a2, a3);
+}
+
  static IRExpr* mkexpr ( IRTemp tmp )
  {
     return IRExpr_Tmp(tmp);
@@ -2250,8 +2250,22 @@ static void putGST_masked ( PPC_GST reg, IRExpr* src, UInt mask )
     case PPC_GST_FPSCR: {
        /* Allow writes to Rounding Mode */
        if (mask & 0x3) {
-         stmt( IRStmt_Put( OFFB_FPROUND,
-                           binop(Iop_And32, src, mkU32(0x3)) ));
+         /* construct new fpround from new and old values as per mask:
+            new fpround = (src & (3 & mask)) | (fpround & (3 & ~mask)) */
+         stmt( 
+            IRStmt_Put( 
+               OFFB_FPROUND,
+               binop(
+                  Iop_Or32, 
+                  binop(Iop_And32, src, mkU32(3 & mask)),
+                  binop(
+                     Iop_And32, 
+                     IRExpr_Get(OFFB_FPROUND,Ity_I32),
+                     mkU32(3 & ~mask)
+                  )
+               )
+            )
+         );
        }
  
        /* Give EmWarn for attempted writes to:
@@ -5355,7 +5369,7 @@ static Bool dis_cache_manage ( UInt         theInstr,
     IRRoundingMode.  PPCRoundingMode encoding is different to
     IRRoundingMode, so need to map it.
  */
-static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
+static IRExpr* /* :: Ity_I32 */ get_IR_roundingmode ( void )
  {
  /* 
     rounding mode | PPC | IR
@@ -5369,17 +5383,11 @@ static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
     assign( rm_PPC32, getGST_masked( PPC_GST_FPSCR, MASK_FPSCR_RN ) );
  
     // rm_IR = XOR( rm_PPC32, (rm_PPC32 << 1) & 2)
-   return binop(Iop_Xor32, mkexpr(rm_PPC32),
-                binop(Iop_And32, mkU32(2),
-                      binop(Iop_Shl32, mkexpr(rm_PPC32), mkU8(1))));
-}
-
-/* Round float to single precision
- - returns type Ity_F64 */
-static IRExpr* roundToSgl ( IRExpr* src )
-{
-   return unop(Iop_F32toF64,
-               binop(Iop_F64toF32, get_roundingmode(), src));
+   return binop( Iop_Xor32, 
+                 mkexpr(rm_PPC32),
+                 binop( Iop_And32, 
+                        binop(Iop_Shl32, mkexpr(rm_PPC32), mkU8(1)),
+                        mkU32(2) ));
  }
  
  
@@ -5410,8 +5418,11 @@ static Bool dis_fp_load ( UInt theInstr )
     assign( rA, getIReg(rA_addr) );
     assign( rB, getIReg(rB_addr) );
  
+   /* These are completely straightforward from a rounding and status
+      bits perspective: no rounding involved and no funny status or CR
+      bits affected. */
  
-   switch(opc1) {
+   switch (opc1) {
     case 0x30: // lfs (Load Float Single, PPC32 p441)
        DIP("lfs fr%u,%d(r%u)\n", frD_addr, simm16, rA_addr);
        assign( EA, ea_rAor0_simm(rA_addr, simm16) );
@@ -5420,10 +5431,8 @@ static Bool dis_fp_load ( UInt theInstr )
        break;
  
     case 0x31: // lfsu (Load Float Single, Update, PPC32 p442)
-      if (rA_addr == 0) {
-         vex_printf("dis_fp_load(ppc)(instr,lfsu)\n");
+      if (rA_addr == 0)
           return False;
-      }
        DIP("lfsu fr%u,%d(r%u)\n", frD_addr, simm16, rA_addr);
        assign( EA, ea_rA_simm(rA_addr, simm16) );
        putFReg( frD_addr,
@@ -5438,10 +5447,8 @@ static Bool dis_fp_load ( UInt theInstr )
        break;
  
     case 0x33: // lfdu (Load Float Double, Update, PPC32 p438)
-      if (rA_addr == 0) {
-         vex_printf("dis_fp_load(ppc)(instr,lfdu)\n");
+      if (rA_addr == 0)
           return False;
-      }
        DIP("lfdu fr%u,%d(r%u)\n", frD_addr, simm16, rA_addr);
        assign( EA, ea_rA_simm(rA_addr, simm16) );
        putFReg( frD_addr, loadBE(Ity_F64, mkexpr(EA)) );
@@ -5463,10 +5470,8 @@ static Bool dis_fp_load ( UInt theInstr )
           break;
           
        case 0x237: // lfsux (Load Float Single, Update Indxd, PPC32 p443)
-         if (rA_addr == 0) {
-            vex_printf("dis_fp_load(ppc)(instr,lfsux)\n");
+         if (rA_addr == 0)
              return False;
-         }
           DIP("lfsux fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr);
           assign( EA, ea_rA_idxd(rA_addr, rB_addr) );
           putFReg( frD_addr,
@@ -5481,10 +5486,8 @@ static Bool dis_fp_load ( UInt theInstr )
           break;
           
        case 0x277: // lfdux (Load Float Double, Update Indxd, PPC32 p439)
-         if (rA_addr == 0) {
-            vex_printf("dis_fp_load(ppc)(instr,lfdux)\n");
+         if (rA_addr == 0)
              return False;
-         }
           DIP("lfdux fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr);
           assign( EA, ea_rA_idxd(rA_addr, rB_addr) );
           putFReg( frD_addr, loadBE(Ity_F64, mkexpr(EA)) );
@@ -5531,29 +5534,31 @@ static Bool dis_fp_store ( UInt theInstr )
     assign( rA,  getIReg(rA_addr) );
     assign( rB,  getIReg(rB_addr) );
  
-   switch(opc1) {
+   /* These are straightforward from a status bits perspective: no
+      funny status or CR bits affected.  For single precision stores,
+      the values are truncated and denormalised (not rounded) to turn
+      them into single precision values. */
+
+   switch (opc1) {
  
     case 0x34: // stfs (Store Float Single, PPC32 p518)
        DIP("stfs fr%u,%d(r%u)\n", frS_addr, simm16, rA_addr);
        assign( EA, ea_rAor0_simm(rA_addr, simm16) );
-      /* TODO
-         This implementation ends up rounding twice, losing accuracy.
-         - first via F64toF32, and then by the backend fp store (stfs)
-      */
+      /* Use Iop_TruncF64asF32 to truncate and possible denormalise
+         the value to be stored in the correct way, without any
+         rounding. */
        storeBE( mkexpr(EA),
-               binop(Iop_F64toF32, get_roundingmode(), mkexpr(frS)) );
+               unop(Iop_TruncF64asF32, mkexpr(frS)) );
        break;
  
     case 0x35: // stfsu (Store Float Single, Update, PPC32 p519)
-      if (rA_addr == 0) {
-         vex_printf("dis_fp_store(ppc)(instr,stfsu)\n");
+      if (rA_addr == 0)
           return False;
-      }
        DIP("stfsu fr%u,%d(r%u)\n", frS_addr, simm16, rA_addr);
        assign( EA, ea_rA_simm(rA_addr, simm16) );
-      /* This implementation loses accuracy - see note for stfs */
+      /* See comment for stfs */
        storeBE( mkexpr(EA),
-               binop(Iop_F64toF32, get_roundingmode(), mkexpr(frS)) );
+               unop(Iop_TruncF64asF32, mkexpr(frS)) );
        putIReg( rA_addr, mkexpr(EA) );
        break;
  
@@ -5564,10 +5569,8 @@ static Bool dis_fp_store ( UInt theInstr )
        break;
  
     case 0x37: // stfdu (Store Float Double, Update, PPC32 p514)
-      if (rA_addr == 0) {
-         vex_printf("dis_fp_store(ppc)(instr,stfdu)\n");
+      if (rA_addr == 0)
           return False;
-      }
        DIP("stfdu fr%u,%d(r%u)\n", frS_addr, simm16, rA_addr);
        assign( EA, ea_rA_simm(rA_addr, simm16) );
        storeBE( mkexpr(EA), mkexpr(frS) );
@@ -5579,26 +5582,23 @@ static Bool dis_fp_store ( UInt theInstr )
           vex_printf("dis_fp_store(ppc)(instr,b0)\n");
           return False;
        }
-
        switch(opc2) {
        case 0x297: // stfsx (Store Float Single Indexed, PPC32 p521)
           DIP("stfsx fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr);
           assign( EA, ea_rAor0_idxd(rA_addr, rB_addr) );
-         /* This implementation loses accuracy - see note for stfs */
-         storeBE( mkexpr(EA), binop(Iop_F64toF32,
-                                    get_roundingmode(), mkexpr(frS)) );
+         /* See note for stfs */
+         storeBE( mkexpr(EA), 
+                  unop(Iop_TruncF64asF32, mkexpr(frS)) );
           break;
           
        case 0x2B7: // stfsux (Store Float Sgl, Update Indxd, PPC32 p520)
-         if (rA_addr == 0) {
-            vex_printf("dis_fp_store(ppc)(instr,stfsux)\n");
+         if (rA_addr == 0)
              return False;
-         }
           DIP("stfsux fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr);
           assign( EA, ea_rA_idxd(rA_addr, rB_addr) );
-         /* This implementation loses accuracy - see note for stfs */
-         storeBE( mkexpr(EA), binop(Iop_F64toF32,
-                                    get_roundingmode(), mkexpr(frS)) );
+         /* See note for stfs */
+         storeBE( mkexpr(EA), 
+                  unop(Iop_TruncF64asF32, mkexpr(frS)) );
           putIReg( rA_addr, mkexpr(EA) );
           break;
  
@@ -5609,10 +5609,8 @@ static Bool dis_fp_store ( UInt theInstr )
           break;
           
        case 0x2F7: // stfdux (Store Float Dbl, Update Indxd, PPC32 p515)
-         if (rA_addr == 0) {
-            vex_printf("dis_fp_store(ppc)(instr,stfdux)\n");
+         if (rA_addr == 0)
              return False;
-         }
           DIP("stfdux fr%u,r%u,r%u\n", frS_addr, rA_addr, rB_addr);
           assign( EA, ea_rA_idxd(rA_addr, rB_addr) );
           storeBE( mkexpr(EA), mkexpr(frS) );
@@ -5655,12 +5653,23 @@ static Bool dis_fp_arith ( UInt theInstr )
     UChar frC_addr = ifieldRegC(theInstr);
     UChar opc2     = ifieldOPClo5(theInstr);
     UChar flag_rC  = ifieldBIT0(theInstr);
-   // Note: flag_rC ignored as fp exceptions not supported.
  
-   IRTemp frD = newTemp(Ity_F64);
-   IRTemp frA = newTemp(Ity_F64);
-   IRTemp frB = newTemp(Ity_F64);
-   IRTemp frC = newTemp(Ity_F64);
+   IRTemp  frD = newTemp(Ity_F64);
+   IRTemp  frA = newTemp(Ity_F64);
+   IRTemp  frB = newTemp(Ity_F64);
+   IRTemp  frC = newTemp(Ity_F64);
+   IRExpr* rm  = get_IR_roundingmode();
+
+   /* By default, we will examine the results of the operation and set
+      fpscr[FPRF] accordingly. */
+   Bool set_FPRF = True;
+
+   /* By default, if flag_RC is set, we will clear cr1 after the
+      operation.  In reality we should set cr1 to indicate the
+      exception status of the operation, but since we're not
+      simulating exceptions, the exception status will appear to be
+      zero.  Hence cr1 should be cleared if this is a . form insn. */
+   Bool clear_CR1 = True;
  
     assign( frA, getFReg(frA_addr));
     assign( frB, getFReg(frB_addr));
@@ -5670,84 +5679,71 @@ static Bool dis_fp_arith ( UInt theInstr )
     case 0x3B:
        switch (opc2) {
        case 0x12: // fdivs (Floating Divide Single, PPC32 p407)
-         if (frC_addr != 0) {
-            vex_printf("dis_fp_arith(ppc)(instr,fdivs)\n");
+         if (frC_addr != 0)
              return False;
-         }
           DIP("fdivs%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frA_addr, frB_addr);
-         assign( frD, roundToSgl( binop(Iop_DivF64,
-                                        mkexpr(frA), mkexpr(frB)) ));
+         assign( frD, triop( Iop_DivF64r32, 
+                             rm, mkexpr(frA), mkexpr(frB) ));
           break;
  
        case 0x14: // fsubs (Floating Subtract Single, PPC32 p430)
-         if (frC_addr != 0) {
-            vex_printf("dis_fp_arith(ppc)(instr,fsubs)\n");
+         if (frC_addr != 0)
              return False;
-         }
           DIP("fsubs%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frA_addr, frB_addr);
-         assign( frD, roundToSgl( 
-                         binop(Iop_SubF64, mkexpr(frA), mkexpr(frB)) ));
+         assign( frD, triop( Iop_SubF64r32, 
+                             rm, mkexpr(frA), mkexpr(frB) ));
           break;
  
        case 0x15: // fadds (Floating Add Single, PPC32 p401)
-         if (frC_addr != 0) {
-            vex_printf("dis_fp_arith(ppc)(instr,fadds)\n");
+         if (frC_addr != 0)
              return False;
-         }
           DIP("fadds%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frA_addr, frB_addr);
-         assign( frD, roundToSgl( 
-                         binop(Iop_AddF64, mkexpr(frA), mkexpr(frB)) ));
+         assign( frD, triop( Iop_AddF64r32, 
+                             rm, mkexpr(frA), mkexpr(frB) ));
           break;
  
        case 0x16: // fsqrts (Floating SqRt (Single-Precision), PPC32 p428)
           // NOTE: POWERPC OPTIONAL, "General-Purpose Group" (PPC32_FX)
-         if (frA_addr != 0 || frC_addr != 0) {
-            vex_printf("dis_fp_arith(ppc)(instr,fsqrts)\n");
+         if (frA_addr != 0 || frC_addr != 0)
              return False;
-         }
           DIP("fsqrts%s fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frB_addr);
           // however illogically, on ppc970 this insn behaves identically
-         // to fsqrt (double-precision).  So don't do round-to-single.
-         assign( frD, unop(Iop_SqrtF64, mkexpr(frB)) );
+         // to fsqrt (double-precision).  So use SqrtF64, not SqrtF64r32.
+         assign( frD, binop( Iop_SqrtF64, rm, mkexpr(frB) ));
           break;
  
        case 0x18: // fres (Floating Reciprocal Estimate Single, PPC32 p421)
           // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX)
-         if (frA_addr != 0 || frC_addr != 0) {
-            vex_printf("dis_fp_arith(ppc)(instr,fres)\n");
+         if (frA_addr != 0 || frC_addr != 0)
              return False;
-         }
           DIP("fres%s fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frB_addr);
-         //assign( frD, unop(Iop_Est8FRecip, mkexpr(frB)) );
           { IRExpr* ieee_one
                = IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL));
-           assign( frD, roundToSgl(binop(Iop_DivF64, ieee_one, mkexpr(frB))) );
+           assign( frD, triop( Iop_DivF64r32, 
+                               rm,
+                               ieee_one, mkexpr(frB) ));
           }
           break;
  
        case 0x19: // fmuls (Floating Multiply Single, PPC32 p414)
-         if (frB_addr != 0) {
-            vex_printf("dis_fp_arith(ppc)(instr,fmuls)\n");
+         if (frB_addr != 0)
              return False;
-         }
           DIP("fmuls%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frA_addr, frC_addr);
-         assign( frD, roundToSgl( binop(Iop_MulF64,
-                                        mkexpr(frA), mkexpr(frC)) ));
+         assign( frD, triop( Iop_MulF64r32,
+                             rm, mkexpr(frA), mkexpr(frC) ));
           break;
  
        case 0x1A: // frsqrtes (Floating Recip SqRt Est Single)
           // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX)
           // Undocumented instruction?
-         if (frA_addr != 0 || frC_addr != 0) {
-            vex_printf("dis_fp_arith(ppc)(instr,frsqrte)\n");
+         if (frA_addr != 0 || frC_addr != 0)
              return False;
-         }
           DIP("frsqrtes%s fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frB_addr);
           assign( frD, unop(Iop_Est5FRSqrt, mkexpr(frB)) );
@@ -5762,44 +5758,36 @@ static Bool dis_fp_arith ( UInt theInstr )
     case 0x3F:
        switch (opc2) {           
        case 0x12: // fdiv (Floating Div (Double-Precision), PPC32 p406)
-         if (frC_addr != 0) {
-            vex_printf("dis_fp_arith(ppc)(instr,fdiv)\n");
+         if (frC_addr != 0)
              return False;
-         }
           DIP("fdiv%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frA_addr, frB_addr);
-         assign( frD, binop( Iop_DivF64, mkexpr(frA), mkexpr(frB) ) );
+         assign( frD, triop(Iop_DivF64, rm, mkexpr(frA), mkexpr(frB)) );
           break;
  
        case 0x14: // fsub (Floating Sub (Double-Precision), PPC32 p429)
-         if (frC_addr != 0) {
-            vex_printf("dis_fp_arith(ppc)(instr,fsub)\n");
+         if (frC_addr != 0)
              return False;
-         }
           DIP("fsub%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frA_addr, frB_addr);
-         assign( frD, binop( Iop_SubF64, mkexpr(frA), mkexpr(frB) ) );
+         assign( frD, triop(Iop_SubF64, rm, mkexpr(frA), mkexpr(frB)) );
           break;
  
        case 0x15: // fadd (Floating Add (Double-Precision), PPC32 p400)
-         if (frC_addr != 0) {
-            vex_printf("dis_fp_arith(ppc)(instr,fadd)\n");
+         if (frC_addr != 0)
              return False;
-         }
           DIP("fadd%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frA_addr, frB_addr);
-         assign( frD, binop( Iop_AddF64, mkexpr(frA), mkexpr(frB) ) );
+         assign( frD, triop(Iop_AddF64, rm, mkexpr(frA), mkexpr(frB)) );
           break;
  
        case 0x16: // fsqrt (Floating SqRt (Double-Precision), PPC32 p427)
           // NOTE: POWERPC OPTIONAL, "General-Purpose Group" (PPC32_FX)
-         if (frA_addr != 0 || frC_addr != 0) {
-            vex_printf("dis_fp_arith(ppc)(instr,fsqrt)\n");
+         if (frA_addr != 0 || frC_addr != 0)
              return False;
-         }
           DIP("fsqrt%s fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frB_addr);
-         assign( frD, unop( Iop_SqrtF64, mkexpr(frB) ) );
+         assign( frD, binop(Iop_SqrtF64, rm, mkexpr(frB)) );
           break;
  
        case 0x17: { // fsel (Floating Select, PPC32 p426)
@@ -5824,6 +5812,9 @@ static Bool dis_fp_arith ( UInt theInstr )
                           binop(Iop_CmpEQ32, mkexpr(cc_b0), mkU32(0))),
                      mkexpr(frB),
                      mkexpr(frC) ));
+
+         /* One of the rare ones which don't mess with FPRF */
+         set_FPRF = False;
           break;
        }
  
@@ -5831,35 +5822,32 @@ static Bool dis_fp_arith ( UInt theInstr )
           // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX)
           // Note: unclear whether this insn really exists or not
           // ppc970 doesn't have it, but POWER5 does
-         if (frA_addr != 0 || frC_addr != 0) {
-            vex_printf("dis_fp_arith(ppc)(instr,fres)\n");
+         if (frA_addr != 0 || frC_addr != 0)
              return False;
-         }
           DIP("fre%s fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frB_addr);
-         //assign( frD, unop(Iop_Est8FRecip, mkexpr(frB)) );
           { IRExpr* ieee_one
                = IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL));
-           assign( frD, binop(Iop_DivF64, ieee_one, mkexpr(frB)) );
+           /* Does this really depend on the rounding mode?  Play safe
+              and use the default. */
+           assign( frD, triop( Iop_DivF64, 
+                               mkU32(Irrm_NEAREST), 
+                               ieee_one, mkexpr(frB) ));
           }
           break;
  
        case 0x19: // fmul (Floating Mult (Double Precision), PPC32 p413)
-         if (frB_addr != 0) {
+         if (frB_addr != 0)
              vex_printf("dis_fp_arith(ppc)(instr,fmul)\n");
-            return False;
-         }
           DIP("fmul%s fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frA_addr, frC_addr);
-         assign( frD, binop( Iop_MulF64, mkexpr(frA), mkexpr(frC) ) );
+         assign( frD, triop(Iop_MulF64, rm, mkexpr(frA), mkexpr(frC)) );
           break;
  
        case 0x1A: // frsqrte (Floating Recip SqRt Est., PPC32 p424)
           // NOTE: POWERPC OPTIONAL, "Graphics Group" (PPC32_GX)
-         if (frA_addr != 0 || frC_addr != 0) {
-            vex_printf("dis_fp_arith(ppc)(instr,frsqrte)\n");
+         if (frA_addr != 0 || frC_addr != 0)
              return False;
-         }
           DIP("frsqrte%s fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frB_addr);
           assign( frD, unop(Iop_Est5FRSqrt, mkexpr(frB)) );
@@ -5877,6 +5865,17 @@ static Bool dis_fp_arith ( UInt theInstr )
     }
  
     putFReg( frD_addr, mkexpr(frD) );
+
+   if (set_FPRF) {
+      // XXX XXX XXX FIXME
+      // set FPRF from frD
+   }
+
+   if (flag_rC && clear_CR1) {
+      putCR321( 1, mkU8(0) );
+      putCR0( 1, mkU8(0) );
+   }
+
     return True;
  }
  
@@ -5896,56 +5895,81 @@ static Bool dis_fp_multadd ( UInt theInstr )
     UChar opc2     = ifieldOPClo5(theInstr);
     UChar flag_rC  = ifieldBIT0(theInstr);
  
-   IRTemp frD = newTemp(Ity_F64);
-   IRTemp frA = newTemp(Ity_F64);
-   IRTemp frB = newTemp(Ity_F64);
-   IRTemp frC = newTemp(Ity_F64);
+   IRTemp  frD = newTemp(Ity_F64);
+   IRTemp  frA = newTemp(Ity_F64);
+   IRTemp  frB = newTemp(Ity_F64);
+   IRTemp  frC = newTemp(Ity_F64);
+   IRTemp  rmt = newTemp(Ity_I32);
+   IRExpr* rm;
+
+   /* By default, we will examine the results of the operation and set
+      fpscr[FPRF] accordingly. */
+   Bool set_FPRF = True;
+
+   /* By default, if flag_RC is set, we will clear cr1 after the
+      operation.  In reality we should set cr1 to indicate the
+      exception status of the operation, but since we're not
+      simulating exceptions, the exception status will appear to be
+      zero.  Hence cr1 should be cleared if this is a . form insn. */
+   Bool clear_CR1 = True;
+
+   /* Bind the rounding mode expression to a temp; there's no
+      point in creating gratuitous CSEs, as we know we'll need 
+      to use it twice. */
+   assign( rmt, get_IR_roundingmode() );
+   rm = mkexpr(rmt);
  
     assign( frA, getFReg(frA_addr));
     assign( frB, getFReg(frB_addr));
     assign( frC, getFReg(frC_addr));
  
+   /* The rounding in this is all a bit dodgy.  The idea is to only do
+      one rounding.  That clearly isn't achieveable without dedicated
+      four-input IR primops, although in the single precision case we
+      can sort-of simulate it by doing the inner multiply in double
+      precision. 
+
+      In the negated cases, the negation happens after rounding. */
+
     switch (opc1) {
     case 0x3B:
        switch (opc2) {
        case 0x1C: // fmsubs (Floating Mult-Subtr Single, PPC32 p412)
           DIP("fmsubs%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frA_addr, frC_addr, frB_addr);
-         assign( frD, roundToSgl( binop( Iop_SubF64,
-                                         binop(Iop_MulF64, mkexpr(frA),
-                                                           mkexpr(frC)),
-                                         mkexpr(frB)) ));
-          break;
+         assign( frD, triop( Iop_SubF64r32, rm,
+                             triop( Iop_MulF64, rm, mkexpr(frA),
+                                                    mkexpr(frC) ),
+                             mkexpr(frB) ));
+         break;
  
        case 0x1D: // fmadds (Floating Mult-Add Single, PPC32 p409)
           DIP("fmadds%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frA_addr, frC_addr, frB_addr);
-         assign( frD, roundToSgl( binop( Iop_AddF64,
-                                         binop(Iop_MulF64, mkexpr(frA),
-                                                           mkexpr(frC)),
-                                         mkexpr(frB)) ));
+         assign( frD, triop( Iop_AddF64r32, rm,
+                             triop( Iop_MulF64, rm, mkexpr(frA),
+                                                    mkexpr(frC) ),
+                             mkexpr(frB) ));
           break;
  
        case 0x1E: // fnmsubs (Float Neg Mult-Subtr Single, PPC32 p420)
           DIP("fnmsubs%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frA_addr, frC_addr, frB_addr);
-         assign( frD, roundToSgl(
-                            unop(Iop_NegF64,
-                                 binop(Iop_SubF64,
-                                       binop(Iop_MulF64, mkexpr(frA),
-                                                         mkexpr(frC)),
-                                       mkexpr(frB))) ));
+         assign( frD, unop( Iop_NegF64,
+                      triop( Iop_SubF64r32, rm,
+                             triop( Iop_MulF64, rm, mkexpr(frA),
+                                                    mkexpr(frC) ),
+                             mkexpr(frB) )));
           break;
  
        case 0x1F: // fnmadds (Floating Negative Multiply-Add Single, PPC32 p418)
           DIP("fnmadds%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frA_addr, frC_addr, frB_addr);
-         assign( frD, roundToSgl(
-                            unop(Iop_NegF64,
-                                 binop(Iop_AddF64,
-                                       binop(Iop_MulF64, mkexpr(frA),
-                                                         mkexpr(frC)),
-                                       mkexpr(frB))) ));
+         assign( frD, unop( Iop_NegF64,
+                      triop( Iop_AddF64r32, rm,
+                             triop( Iop_MulF64, rm, mkexpr(frA),
+                                                    mkexpr(frC) ),
+                             mkexpr(frB) )));
           break;
  
        default:
@@ -5959,18 +5983,18 @@ static Bool dis_fp_multadd ( UInt theInstr )
        case 0x1C: // fmsub (Float Mult-Sub (Dbl Precision), PPC32 p411)
           DIP("fmsub%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frA_addr, frC_addr, frB_addr);
-         assign( frD, binop( Iop_SubF64,
-                             binop( Iop_MulF64, mkexpr(frA),
-                                                mkexpr(frC) ),
+         assign( frD, triop( Iop_SubF64, rm,
+                             triop( Iop_MulF64, rm, mkexpr(frA),
+                                                    mkexpr(frC) ),
                               mkexpr(frB) ));
           break;
  
        case 0x1D: // fmadd (Float Mult-Add (Dbl Precision), PPC32 p408)
           DIP("fmadd%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frA_addr, frC_addr, frB_addr);
-         assign( frD, binop( Iop_AddF64,
-                             binop( Iop_MulF64, mkexpr(frA),
-                                                mkexpr(frC) ),
+         assign( frD, triop( Iop_AddF64, rm,
+                             triop( Iop_MulF64, rm, mkexpr(frA),
+                                                    mkexpr(frC) ),
                               mkexpr(frB) ));
           break;
  
@@ -5978,20 +6002,20 @@ static Bool dis_fp_multadd ( UInt theInstr )
           DIP("fnmsub%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frA_addr, frC_addr, frB_addr);
           assign( frD, unop( Iop_NegF64,
-                            binop( Iop_SubF64,
-                                   binop( Iop_MulF64, mkexpr(frA),
-                                                      mkexpr(frC) ),
-                                   mkexpr(frB) )));
+                      triop( Iop_SubF64, rm,
+                             triop( Iop_MulF64, rm, mkexpr(frA),
+                                                    mkexpr(frC) ),
+                             mkexpr(frB) )));
           break;
  
        case 0x1F: // fnmadd (Float Neg Mult-Add (Dbl Precision), PPC32 p417)
           DIP("fnmadd%s fr%u,fr%u,fr%u,fr%u\n", flag_rC ? ".":"",
               frD_addr, frA_addr, frC_addr, frB_addr);
           assign( frD, unop( Iop_NegF64,
-                            binop( Iop_AddF64,
-                                   binop( Iop_MulF64, mkexpr(frA),
-                                                      mkexpr(frC) ),
-                                   mkexpr(frB) )));
+                      triop( Iop_AddF64, rm,
+                             triop( Iop_MulF64, rm, mkexpr(frA),
+                                                    mkexpr(frC) ),
+                             mkexpr(frB) )));
           break;
  
        default:
@@ -6006,6 +6030,17 @@ static Bool dis_fp_multadd ( UInt theInstr )
     }
  
     putFReg( frD_addr, mkexpr(frD) );
+
+   if (set_FPRF) {
+      // XXX XXX XXX FIXME
+      // set FPRF from frD
+   }
+
+   if (flag_rC && clear_CR1) {
+      putCR321( 1, mkU8(0) );
+      putCR0( 1, mkU8(0) );
+   }
+
     return True;
  }
  
@@ -6051,18 +6086,37 @@ static Bool dis_fp_cmp ( UInt theInstr )
       LT            | 0x8 | 0x01
     */
  
-   // ccPPC32 = Shl(1, (0x2 & ~(ccIR>>5)) || (0x1 & (XOR(ccIR, ccIR>>6))))
+   // ccPPC32 = Shl(1, (~(ccIR>>5) & 2) 
+   //                    | ((ccIR ^ (ccIR>>6)) & 1)
     assign(
        ccPPC32,
-      binop(Iop_Shl32, mkU32(1),
-            unop(Iop_32to8, 
-                 binop(Iop_Or32,
-                       binop(Iop_And32, mkU32(2),
-                             unop(Iop_Not32,
-                                  binop(Iop_Shr32, mkexpr(ccIR), mkU8(5)))),
-                       binop(Iop_And32, mkU32(1),
-                             binop(Iop_Xor32, mkexpr(ccIR),
-                                   binop(Iop_Shr32, mkexpr(ccIR), mkU8(6)))))))
+      binop(
+         Iop_Shl32, 
+         mkU32(1),
+         unop(
+            Iop_32to8, 
+            binop(
+               Iop_Or32,
+               binop(
+                  Iop_And32, 
+                  unop(
+                     Iop_Not32,
+                     binop(Iop_Shr32, mkexpr(ccIR), mkU8(5))
+                  ),
+                  mkU32(2)
+               ),
+               binop(
+                  Iop_And32, 
+                  binop(
+                     Iop_Xor32, 
+                     mkexpr(ccIR),
+                     binop(Iop_Shr32, mkexpr(ccIR), mkU8(6))
+                  ),
+                  mkU32(1)
+               )
+            )
+         )
+      )
     );
  
     putGST_field( PPC_GST_CR, mkexpr(ccPPC32), crfD );
@@ -6070,6 +6124,8 @@ static Bool dis_fp_cmp ( UInt theInstr )
     /* CAB: TODO?: Support writing cc to FPSCR->FPCC ?
        putGST_field( PPC_GST_FPSCR, mkexpr(ccPPC32), 4 );
     */
+   // XXX XXX XXX FIXME
+   // Also write the result into FPRF (it's not entirely clear how)
  
     /* Note: Differences between fcmpu and fcmpo are only in exception
        flag settings, which aren't supported anyway. */
@@ -6102,11 +6158,23 @@ static Bool dis_fp_round ( UInt theInstr )
     UInt  opc2     = ifieldOPClo10(theInstr);
     UChar flag_rC  = ifieldBIT0(theInstr);
  
-   IRTemp frD = newTemp(Ity_F64);
-   IRTemp frB = newTemp(Ity_F64);
-   IRTemp r_tmp32 = newTemp(Ity_I32);
-   IRTemp r_tmp64 = newTemp(Ity_I64);
-
+   IRTemp  frD     = newTemp(Ity_F64);
+   IRTemp  frB     = newTemp(Ity_F64);
+   IRTemp  r_tmp32 = newTemp(Ity_I32);
+   IRTemp  r_tmp64 = newTemp(Ity_I64);
+   IRExpr* rm      = get_IR_roundingmode();
+
+   /* By default, we will examine the results of the operation and set
+      fpscr[FPRF] accordingly. */
+   Bool set_FPRF = True;
+
+   /* By default, if flag_RC is set, we will clear cr1 after the
+      operation.  In reality we should set cr1 to indicate the
+      exception status of the operation, but since we're not
+      simulating exceptions, the exception status will appear to be
+      zero.  Hence cr1 should be cleared if this is a . form insn. */
+   Bool clear_CR1 = True;
+   
     if (opc1 != 0x3F || b16to20 != 0) {
        vex_printf("dis_fp_round(ppc)(instr)\n");
        return False;
@@ -6117,42 +6185,52 @@ static Bool dis_fp_round ( UInt theInstr )
     switch (opc2) {
     case 0x00C: // frsp (Float Round to Single, PPC32 p423)
        DIP("frsp%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
-      assign( frD, roundToSgl( mkexpr(frB) ));
+      assign( frD, binop( Iop_RoundF64toF32, rm, mkexpr(frB) ));
        break;
        
     case 0x00E: // fctiw (Float Conv to Int, PPC32 p404)
        DIP("fctiw%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
        assign( r_tmp32,
-              binop(Iop_F64toI32, get_roundingmode(), mkexpr(frB)) );
+              binop(Iop_F64toI32, rm, mkexpr(frB)) );
        assign( frD, unop( Iop_ReinterpI64asF64,
                           unop( Iop_32Uto64, mkexpr(r_tmp32))));
+      /* FPRF is undefined after fctiw.  Leave unchanged. */
+      set_FPRF = False;
        break;
        
     case 0x00F: // fctiwz (Float Conv to Int, Round to Zero, PPC32 p405)
        DIP("fctiwz%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
-      assign( r_tmp32, binop(Iop_F64toI32, mkU32(Irrm_ZERO), mkexpr(frB)) );
+      assign( r_tmp32, 
+              binop(Iop_F64toI32, mkU32(Irrm_ZERO), mkexpr(frB) ));
        assign( frD, unop( Iop_ReinterpI64asF64,
                           unop( Iop_32Uto64, mkexpr(r_tmp32))));
+      /* FPRF is undefined after fctiwz.  Leave unchanged. */
+      set_FPRF = False;
        break;
  
     case 0x32E: // fctid (Float Conv to Int DWord, PPC64 p437)
        DIP("fctid%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
        assign( r_tmp64,
-              binop(Iop_F64toI64, get_roundingmode(), mkexpr(frB)) );
+              binop(Iop_F64toI64, rm, mkexpr(frB)) );
        assign( frD, unop( Iop_ReinterpI64asF64, mkexpr(r_tmp64)) );
+      /* FPRF is undefined after fctid.  Leave unchanged. */
+      set_FPRF = False;
        break;
  
     case 0x32F: // fctidz (Float Conv to Int DWord, Round to Zero, PPC64 p437)
        DIP("fctidz%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
-      assign( r_tmp64, binop(Iop_F64toI64, mkU32(Irrm_ZERO), mkexpr(frB)) );
+      assign( r_tmp64, 
+              binop(Iop_F64toI64, mkU32(Irrm_ZERO), mkexpr(frB)) );
        assign( frD, unop( Iop_ReinterpI64asF64, mkexpr(r_tmp64)) );
+      /* FPRF is undefined after fctidz.  Leave unchanged. */
+      set_FPRF = False;
        break;
  
     case 0x34E: // fcfid (Float Conv from Int DWord, PPC64 p434)
        DIP("fcfid%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
        assign( r_tmp64, unop( Iop_ReinterpF64asI64, mkexpr(frB)) );
-      assign( frD, binop(Iop_I64toF64, get_roundingmode(),
-                                       mkexpr(r_tmp64)) );
+      assign( frD, 
+              binop(Iop_I64toF64, rm, mkexpr(r_tmp64)) );
        break;
  
     default:
@@ -6161,6 +6239,17 @@ static Bool dis_fp_round ( UInt theInstr )
     }
  
     putFReg( frD_addr, mkexpr(frD) );
+
+   if (set_FPRF) {
+      // XXX XXX XXX FIXME
+      // set FPRF from frD
+   }
+
+   if (flag_rC && clear_CR1) {
+      putCR321( 1, mkU8(0) );
+      putCR0( 1, mkU8(0) );
+   }
+
     return True;
  }
  
@@ -6216,6 +6305,15 @@ static Bool dis_fp_move ( UInt theInstr )
     }
  
     putFReg( frD_addr, mkexpr(frD) );
+
+   /* None of these change FPRF.  cr1 is set in the usual way though,
+      if flag_rC is set. */
+
+   if (flag_rC) {
+      putCR321( 1, mkU8(0) );
+      putCR0( 1, mkU8(0) );
+   }
+
     return True;
  }
  
@@ -9160,7 +9258,7 @@ DisResult disInstr_PPC_WRK (
     decode_noFX:
        vassert(!allow_FX);
        vex_printf("disInstr(ppc): "
-                 "declined to decode an GeneralPurpose-Optional insn.\n");
+                 "declined to decode a GeneralPurpose-Optional insn.\n");
        goto decode_failure;
     decode_noGX:
        vassert(!allow_GX);
diff --git a/VEX/priv/guest-x86/toIR.c b/VEX/priv/guest-x86/toIR.c

index 89bd4b1ced6fae14207746b5b8f50a0ccde6ad85..2a0c20918aa84e2bc10e0d722498ec3fc22f6fd6 100644 (file)
--- a/VEX/priv/guest-x86/toIR.c
+++ b/VEX/priv/guest-x86/toIR.c
@@ -3992,7 +3992,7 @@ UInt dis_FPU ( Bool* decode_ok, UChar sorb, Int delta )
              case 0xFC: /* FRNDINT */
                 DIP("frndint\n");
                 put_ST_UNCHECKED(0,
-                  binop(Iop_RoundF64, get_roundingmode(), get_ST(0)) );
+                  binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
                 break;
  
              case 0xFD: /* FSCALE */
diff --git a/VEX/priv/host-amd64/isel.c b/VEX/priv/host-amd64/isel.c

index 0fd869c6de77c58bf22043500f954119fec6edc6..4d31e00ca3b0bde1d6ec7e42c98c002be4a9e695 100644 (file)
--- a/VEX/priv/host-amd64/isel.c
+++ b/VEX/priv/host-amd64/isel.c
@@ -2850,7 +2850,7 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
  //..       }
  //..    }
  
-   if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64) {
+   if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
        AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
        HReg        arg    = iselDblExpr(env, e->Iex.Binop.arg2);
        HReg        dst    = newVRegV(env);
diff --git a/VEX/priv/host-ppc/hdefs.c b/VEX/priv/host-ppc/hdefs.c

index 4058e8a7b33990c79c78e2b755a94e6eaf40ca91..3633fa7b9ea8f0e5d30453ddf7aff2d3e299c30f 100644 (file)
--- a/VEX/priv/host-ppc/hdefs.c
+++ b/VEX/priv/host-ppc/hdefs.c
@@ -612,10 +612,14 @@ HChar* showPPCShftOp ( PPCShftOp op, Bool immR, Bool sz32 ) {
  
  HChar* showPPCFpOp ( PPCFpOp op ) {
     switch (op) {
-      case Pfp_ADD:    return "fadd";
-      case Pfp_SUB:    return "fsub";
-      case Pfp_MUL:    return "fmul";
-      case Pfp_DIV:    return "fdiv";
+      case Pfp_ADDD:   return "fadd";
+      case Pfp_SUBD:   return "fsub";
+      case Pfp_MULD:   return "fmul";
+      case Pfp_DIVD:   return "fdiv";
+      case Pfp_ADDS:   return "fadds";
+      case Pfp_SUBS:   return "fsubs";
+      case Pfp_MULS:   return "fmuls";
+      case Pfp_DIVS:   return "fdivs";
        case Pfp_SQRT:   return "fsqrt";
        case Pfp_ABS:    return "fabs";
        case Pfp_NEG:    return "fneg";
@@ -3155,18 +3159,30 @@ Int emit_PPCInstr ( UChar* buf, Int nbuf, PPCInstr* i,
        UInt fr_srcL = fregNo(i->Pin.FpBinary.srcL);
        UInt fr_srcR = fregNo(i->Pin.FpBinary.srcR);
        switch (i->Pin.FpBinary.op) {
-      case Pfp_ADD:   // fadd, PPC32 p400
+      case Pfp_ADDD:   // fadd, PPC32 p400
           p = mkFormA( p, 63, fr_dst, fr_srcL, fr_srcR, 0, 21, 0 );
           break;
-      case Pfp_SUB:   // fsub, PPC32 p429
+      case Pfp_ADDS:   // fadds, PPC32 p401
+         p = mkFormA( p, 59, fr_dst, fr_srcL, fr_srcR, 0, 21, 0 );
+         break;
+      case Pfp_SUBD:   // fsub, PPC32 p429
           p = mkFormA( p, 63, fr_dst, fr_srcL, fr_srcR, 0, 20, 0 );
           break;
-      case Pfp_MUL:   // fmul, PPC32 p413
+      case Pfp_SUBS:   // fsubs, PPC32 p430
+         p = mkFormA( p, 59, fr_dst, fr_srcL, fr_srcR, 0, 20, 0 );
+         break;
+      case Pfp_MULD:   // fmul, PPC32 p413
           p = mkFormA( p, 63, fr_dst, fr_srcL, 0, fr_srcR, 25, 0 );
           break;
-      case Pfp_DIV:   // fdiv, PPC32 p406
+      case Pfp_MULS:   // fmuls, PPC32 p414
+         p = mkFormA( p, 59, fr_dst, fr_srcL, 0, fr_srcR, 25, 0 );
+         break;
+      case Pfp_DIVD:   // fdiv, PPC32 p406
           p = mkFormA( p, 63, fr_dst, fr_srcL, fr_srcR, 0, 18, 0 );
           break;
+      case Pfp_DIVS:   // fdivs, PPC32 p407
+         p = mkFormA( p, 59, fr_dst, fr_srcL, fr_srcR, 0, 18, 0 );
+         break;
        default:
           goto bad;
        }
diff --git a/VEX/priv/host-ppc/hdefs.h b/VEX/priv/host-ppc/hdefs.h

index ff2bdaae55e755dddbd88f449326ecfde1a1c8ad..7a8b1aa79bd3aeea056c732db8175e4de3a2851a 100644 (file)
--- a/VEX/priv/host-ppc/hdefs.h
+++ b/VEX/priv/host-ppc/hdefs.h
@@ -368,7 +368,8 @@ typedef
     enum {
        Pfp_INVALID,
        /* Binary */
-      Pfp_ADD, Pfp_SUB, Pfp_MUL, Pfp_DIV, 
+      Pfp_ADDD, Pfp_SUBD, Pfp_MULD, Pfp_DIVD, 
+      Pfp_ADDS, Pfp_SUBS, Pfp_MULS, Pfp_DIVS, 
  
        /* Unary */
        Pfp_SQRT, Pfp_ABS, Pfp_NEG, Pfp_MOV, Pfp_RES, Pfp_RSQRTE
diff --git a/VEX/priv/host-ppc/isel.c b/VEX/priv/host-ppc/isel.c

index 806919c739c9aaab7b605bfb3cc94d500211f89b..0477d4aaffdaa5c257a088392a3a204f463c5065 100644 (file)
--- a/VEX/priv/host-ppc/isel.c
+++ b/VEX/priv/host-ppc/isel.c
@@ -188,8 +188,11 @@ static IRExpr* bind ( Int binder )
      - A Bool to tell us if the host is 32 or 64bit.
        This is set at the start and does not change.
   
-    Note, this is mostly host-independent.
-    (JRS 20050201: well, kinda...  Compare with ISelEnv for amd64.)
+    - An IRExpr*, which may be NULL, holding the IR expression (an
+      IRRoundingMode-encoded value) to which the FPU's rounding mode
+      was most recently set.  Setting to NULL is always safe.  Used to
+      avoid redundant settings of the FPU's rounding mode, as
+      described in set_FPU_rounding_mode below.
  */
   
  typedef
@@ -210,6 +213,8 @@ typedef
        UInt         hwcaps;
  
        Bool         mode64;
+
+      IRExpr*      previous_rm;
     }
     ISelEnv;
   
@@ -359,7 +364,7 @@ static PPCInstr* mk_iMOVds_RR ( HReg r_dst, HReg r_src )
     return PPCInstr_Alu(Palu_OR, r_dst, r_src, PPCRH_Reg(r_src));
  }
  
-/* Advance/retreat %sp by n. */
+/* Advance/retreat %r1 by n. */
  
  static void add_to_sp ( ISelEnv* env, UInt n )
  {
@@ -464,6 +469,73 @@ static PPCAMode* advance4 ( ISelEnv* env, PPCAMode* am )
     return am4;
  }
  
+
+/* Given a guest-state array descriptor, an index expression and a
+   bias, generate a PPCAMode pointing at the relevant piece of 
+   guest state.  Only needed in 64-bit mode. */
+static
+PPCAMode* genGuestArrayOffset ( ISelEnv* env, IRArray* descr,
+                                IRExpr* off, Int bias )
+{
+   HReg rtmp, roff;
+   Int  elemSz = sizeofIRType(descr->elemTy);
+   Int  nElems = descr->nElems;
+   Int  shift  = 0;
+
+   vassert(env->mode64);
+
+   /* Throw out any cases we don't need.  In theory there might be a
+      day where we need to handle others, but not today. */
+
+   if (nElems != 16 && nElems != 32)
+      vpanic("genGuestArrayOffset(ppc64 host)(1)");
+
+   switch (elemSz) {
+      case 8:  shift = 3; break;
+      default: vpanic("genGuestArrayOffset(ppc64 host)(2)");
+   }
+
+   if (bias < -100 || bias > 100) /* somewhat arbitrarily */
+      vpanic("genGuestArrayOffset(ppc64 host)(3)");
+   if (descr->base < 0 || descr->base > 2000) /* somewhat arbitrarily */
+     vpanic("genGuestArrayOffset(ppc64 host)(4)");
+
+   /* Compute off into a reg, %off.  Then return:
+
+         addi %tmp, %off, bias (if bias != 0)
+         andi %tmp, nElems-1
+         sldi %tmp, shift
+         addi %tmp, %tmp, base
+         ... Baseblockptr + %tmp ...
+   */
+   roff = iselWordExpr_R(env, off);
+   rtmp = newVRegI(env);
+   addInstr(env, PPCInstr_Alu(
+                    Palu_ADD, 
+                    rtmp, roff, 
+                    PPCRH_Imm(True/*signed*/, toUShort(bias))));
+   addInstr(env, PPCInstr_Alu(
+                    Palu_AND, 
+                    rtmp, rtmp, 
+                    PPCRH_Imm(False/*signed*/, toUShort(nElems-1))));
+   addInstr(env, PPCInstr_Shft(
+                    Pshft_SHL, 
+                    False/*64-bit shift*/,
+                    rtmp, rtmp, 
+                    PPCRH_Imm(False/*unsigned*/, toUShort(shift))));
+   addInstr(env, PPCInstr_Alu(
+                    Palu_ADD, 
+                    rtmp, rtmp, 
+                    PPCRH_Imm(True/*signed*/, toUShort(descr->base))));
+   return
+      PPCAMode_RR( GuestStatePtr(env->mode64), rtmp );
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Function call helpers                       ---*/
+/*---------------------------------------------------------*/
+
  /* Used only in doHelperCall.  See big comment in doHelperCall re
     handling of register-parameter args.  This function figures out
     whether evaluation of an expression might require use of a fixed
@@ -715,126 +787,92 @@ void doHelperCall ( ISelEnv* env,
  }
  
  
-/* Given a guest-state array descriptor, an index expression and a
-   bias, generate a PPCAMode pointing at the relevant piece of 
-   guest state.  Only needed in 64-bit mode. */
-static
-PPCAMode* genGuestArrayOffset ( ISelEnv* env, IRArray* descr,
-                                IRExpr* off, Int bias )
-{
-   HReg rtmp, roff;
-   Int  elemSz = sizeofIRType(descr->elemTy);
-   Int  nElems = descr->nElems;
-   Int  shift  = 0;
-
-   vassert(env->mode64);
-
-   /* Throw out any cases we don't need.  In theory there might be a
-      day where we need to handle others, but not today. */
-
-   if (nElems != 16 && nElems != 32)
-      vpanic("genGuestArrayOffset(ppc64 host)(1)");
-
-   switch (elemSz) {
-      case 8:  shift = 3; break;
-      default: vpanic("genGuestArrayOffset(ppc64 host)(2)");
-   }
-
-   if (bias < -100 || bias > 100) /* somewhat arbitrarily */
-      vpanic("genGuestArrayOffset(ppc64 host)(3)");
-   if (descr->base < 0 || descr->base > 2000) /* somewhat arbitrarily */
-     vpanic("genGuestArrayOffset(ppc64 host)(4)");
-
-   /* Compute off into a reg, %off.  Then return:
-
-         addi %tmp, %off, bias (if bias != 0)
-         andi %tmp, nElems-1
-         sldi %tmp, shift
-         addi %tmp, %tmp, base
-         ... Baseblockptr + %tmp ...
-   */
-   roff = iselWordExpr_R(env, off);
-   rtmp = newVRegI(env);
-   addInstr(env, PPCInstr_Alu(
-                    Palu_ADD, 
-                    rtmp, roff, 
-                    PPCRH_Imm(True/*signed*/, toUShort(bias))));
-   addInstr(env, PPCInstr_Alu(
-                    Palu_AND, 
-                    rtmp, rtmp, 
-                    PPCRH_Imm(False/*signed*/, toUShort(nElems-1))));
-   addInstr(env, PPCInstr_Shft(
-                    Pshft_SHL, 
-                    False/*64-bit shift*/,
-                    rtmp, rtmp, 
-                    PPCRH_Imm(False/*unsigned*/, toUShort(shift))));
-   addInstr(env, PPCInstr_Alu(
-                    Palu_ADD, 
-                    rtmp, rtmp, 
-                    PPCRH_Imm(True/*signed*/, toUShort(descr->base))));
-   return
-      PPCAMode_RR( GuestStatePtr(env->mode64), rtmp );
-}
-
-
+/*---------------------------------------------------------*/
+/*--- ISEL: FP rounding mode helpers                    ---*/
+/*---------------------------------------------------------*/
  
-/* Set FPU's rounding mode to the default */
-static 
-void set_FPU_rounding_default ( ISelEnv* env )
-{
-   HReg fr_src = newVRegF(env);
-   HReg r_src  = newVRegI(env);
-
-   /* Default rounding mode = 0x0
-      Only supporting the rounding-mode bits - the rest of FPSCR is 0x0
-       - so we can set the whole register at once (faster)
-      note: upper 32 bits ignored by FpLdFPSCR
-   */
-   addInstr(env, PPCInstr_LI(r_src, 0x0, env->mode64));
-   if (env->mode64) {
-      fr_src = mk_LoadR64toFPR( env, r_src );         // 1*I64 -> F64
-   } else {
-      fr_src = mk_LoadRR32toFPR( env, r_src, r_src ); // 2*I32 -> F64
-   }
-   addInstr(env, PPCInstr_FpLdFPSCR( fr_src ));
-}
+///* Set FPU's rounding mode to the default */
+//static 
+//void set_FPU_rounding_default ( ISelEnv* env )
+//{
+//   HReg fr_src = newVRegF(env);
+//   HReg r_src  = newVRegI(env);
+//
+//   /* Default rounding mode = 0x0
+//      Only supporting the rounding-mode bits - the rest of FPSCR is 0x0
+//       - so we can set the whole register at once (faster)
+//      note: upper 32 bits ignored by FpLdFPSCR
+//   */
+//   addInstr(env, PPCInstr_LI(r_src, 0x0, env->mode64));
+//   if (env->mode64) {
+//      fr_src = mk_LoadR64toFPR( env, r_src );         // 1*I64 -> F64
+//   } else {
+//      fr_src = mk_LoadRR32toFPR( env, r_src, r_src ); // 2*I32 -> F64
+//   }
+//   addInstr(env, PPCInstr_FpLdFPSCR( fr_src ));
+//}
  
  /* Convert IR rounding mode to PPC encoding */
  static HReg roundModeIRtoPPC ( ISelEnv* env, HReg r_rmIR )
  {
-/* 
+   /* 
     rounding mode | PPC | IR
     ------------------------
     to nearest    | 00  | 00
     to zero       | 01  | 11
     to +infinity  | 10  | 10
     to -infinity  | 11  | 01
-*/
+   */
     HReg r_rmPPC = newVRegI(env);
-   HReg r_tmp   = newVRegI(env);
+   HReg r_tmp1  = newVRegI(env);
  
     vassert(hregClass(r_rmIR) == HRcGPR(env->mode64));
  
-   // AND r_rmIR,3   -- shouldn't be needed; paranoia
-   addInstr(env, PPCInstr_Alu( Palu_AND, r_rmIR, r_rmIR,
-                               PPCRH_Imm(False,3) ));
+   // r_rmPPC = XOR(r_rmIR, r_rmIR << 1) & 3
+   //
+   // slwi  tmp1,    r_rmIR, 1
+   // xor   tmp1,    r_rmIR, tmp1
+   // andi  r_rmPPC, tmp1, 3
  
-   // r_rmPPC = XOR( r_rmIR, (r_rmIR << 1) & 2)
     addInstr(env, PPCInstr_Shft(Pshft_SHL, True/*32bit shift*/,
-                               r_tmp, r_rmIR, PPCRH_Imm(False,1)));
-   addInstr(env, PPCInstr_Alu( Palu_AND, r_tmp, r_tmp,
-                               PPCRH_Imm(False,2) ));
-   addInstr(env, PPCInstr_Alu( Palu_XOR, r_rmPPC, r_rmIR,
-                               PPCRH_Reg(r_tmp) ));
+                               r_tmp1, r_rmIR, PPCRH_Imm(False,1)));
+
+   addInstr(env, PPCInstr_Alu( Palu_XOR, r_tmp1, r_rmIR,
+                               PPCRH_Reg(r_tmp1) ));
+
+   addInstr(env, PPCInstr_Alu( Palu_AND, r_rmPPC, r_tmp1,
+                               PPCRH_Imm(False,3) ));
+
     return r_rmPPC;
  }
  
  
-/* Mess with the FPU's rounding mode: 'mode' is an I32-typed
-   expression denoting a value in the range 0 .. 3, indicating a round
-   mode encoded as per type IRRoundingMode.  Set the PPC FPSCR to have
-   the same rounding.
+/* Set the FPU's rounding mode: 'mode' is an I32-typed expression
+   denoting a value in the range 0 .. 3, indicating a round mode
+   encoded as per type IRRoundingMode.  Set the PPC FPSCR to have the
+   same rounding.
+
     For speed & simplicity, we're setting the *entire* FPSCR here.
+
+   Setting the rounding mode is expensive.  So this function tries to
+   avoid repeatedly setting the rounding mode to the same thing by
+   first comparing 'mode' to the 'mode' tree supplied in the previous
+   call to this function, if any.  (The previous value is stored in
+   env->previous_rm.)  If 'mode' is a single IR temporary 't' and
+   env->previous_rm is also just 't', then the setting is skipped.
+
+   This is safe because of the SSA property of IR: an IR temporary can
+   only be defined once and so will have the same value regardless of
+   where it appears in the block.  Cool stuff, SSA.
+
+   A safety condition: all attempts to set the RM must be aware of
+   this mechanism - by being routed through the functions here.
+
+   Of course this only helps if blocks where the RM is set more than
+   once and it is set to the same value each time, *and* that value is
+   held in the same IR temporary each time.  In order to assure the
+   latter as much as possible, the IR optimiser takes care to do CSE
+   on any block with any sign of floating point activity.
  */
  static
  void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
@@ -843,10 +881,22 @@ void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
     HReg r_src;
  
     vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
+   
+   /* Do we need to do anything? */
+   if (env->previous_rm
+       && env->previous_rm->tag == Iex_Tmp
+       && mode->tag == Iex_Tmp
+       && env->previous_rm->Iex.Tmp.tmp == mode->Iex.Tmp.tmp) {
+      /* no - setting it to what it was before.  */
+      vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
+      return;
+   }
  
-   /* Only supporting the rounding-mode bits - the rest of FPSCR is 0x0
-       - so we can set the whole register at once (faster)
-   */
+   /* No luck - we better set it, and remember what we set it to. */
+   env->previous_rm = mode;
+
+   /* Only supporting the rounding-mode bits - the rest of FPSCR is
+      0x0 - so we can set the whole register at once (faster). */
  
     // Resolve rounding mode and convert to PPC representation
     r_src = roundModeIRtoPPC( env, iselWordExpr_R(env, mode) );
@@ -862,6 +912,10 @@ void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
  }
  
  
+/*---------------------------------------------------------*/
+/*--- ISEL: vector helpers                              ---*/
+/*---------------------------------------------------------*/
+
  /*
    Generates code for AvSplat
    - takes in IRExpr* of type 8|16|32
@@ -962,7 +1016,7 @@ static HReg isNan ( ISelEnv* env, HReg vSrc )
     mnts    = newVRegV(env);
     vIsNan  = newVRegV(env); 
  
-   /* 32bit float => sign(1) | expontent(8) | mantissa(23)
+   /* 32bit float => sign(1) | exponent(8) | mantissa(23)
        nan => exponent all ones, mantissa > 0 */
  
     addInstr(env, PPCInstr_AvBinary(Pav_AND, expt, vSrc, msk_exp));
@@ -1322,8 +1376,8 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e )
  
           add_to_sp( env, 16 );
  
-         /* Restore default FPU rounding. */
-         set_FPU_rounding_default( env );
+         ///* Restore default FPU rounding. */
+         //set_FPU_rounding_default( env );
           return idst;
        }
  
@@ -1345,8 +1399,8 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, IRExpr* e )
              addInstr(env, PPCInstr_Load(8, idst, zero_r1, True/*mode64*/));
              add_to_sp( env, 16 );
  
-            /* Restore default FPU rounding. */
-            set_FPU_rounding_default( env );
+            ///* Restore default FPU rounding. */
+            //set_FPU_rounding_default( env );
              return idst;
           }
        }
@@ -2179,8 +2233,7 @@ static PPCCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
           HReg hi, lo;
           HReg tmp = newVRegI(env);
           iselInt64Expr( &hi, &lo, env, e->Iex.Unop.arg );
-         addInstr(env, mk_iMOVds_RR(tmp, lo));
-         addInstr(env, PPCInstr_Alu(Palu_OR, tmp, tmp, PPCRH_Reg(hi)));
+         addInstr(env, PPCInstr_Alu(Palu_OR, tmp, lo, PPCRH_Reg(hi)));
           addInstr(env, PPCInstr_Cmp(False/*sign*/, True/*32bit cmp*/,
                                      7/*cr*/, tmp,PPCRH_Imm(False,0)));
           return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
@@ -2501,8 +2554,8 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo,
              addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
              add_to_sp( env, 16 );
  
-            /* Restore default FPU rounding. */
-            set_FPU_rounding_default( env );
+            ///* Restore default FPU rounding. */
+            //set_FPU_rounding_default( env );
              *rHi = tHi;
              *rLo = tLo;
              return;
@@ -2681,19 +2734,6 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
        return r_dst;
     }
  
-   if (e->tag == Iex_Binop
-       && e->Iex.Binop.op == Iop_F64toF32) {
-      /* Although the result is still held in a standard FPU register,
-         we need to round it to reflect the loss of accuracy/range
-         entailed in casting it to a 32-bit float. */
-      HReg r_dst = newVRegF(env);
-      HReg r_src = iselDblExpr(env, e->Iex.Binop.arg2);
-      set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
-      addInstr(env, PPCInstr_FpRSP(r_dst, r_src));
-      set_FPU_rounding_default( env );
-      return r_dst;
-   }
-
     if (e->tag == Iex_Get) {
        HReg r_dst = newVRegF(env);
        PPCAMode* am_addr = PPCAMode_IR( e->Iex.Get.offset,
@@ -2702,6 +2742,50 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
        return r_dst;
     }
  
+   if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_TruncF64asF32) {
+      /* This is quite subtle.  The only way to do the relevant
+         truncation is to do a single-precision store and then a
+         double precision load to get it back into a register.  The
+         problem is, if the data is then written to memory a second
+         time, as in
+
+            STbe(...) = TruncF64asF32(...)
+
+         then will the second truncation further alter the value?  The
+         answer is no: flds (as generated here) followed by fsts
+         (generated for the STbe) is the identity function on 32-bit
+         floats, so we are safe.
+
+         Another upshot of this is that if iselStmt can see the
+         entirety of
+
+            STbe(...) = TruncF64asF32(arg)
+
+         then it can short circuit having to deal with TruncF64asF32
+         individually; instead just compute arg into a 64-bit FP
+         register and do 'fsts' (since that itself does the
+         truncation).
+
+         We generate pretty poor code here (should be ok both for
+         32-bit and 64-bit mode); but it is expected that for the most
+         part the latter optimisation will apply and hence this code
+         will not often be used.
+      */
+      HReg      fsrc    = iselDblExpr(env, e->Iex.Unop.arg);
+      HReg      fdst    = newVRegF(env);
+      PPCAMode* zero_r1 = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
+
+      sub_from_sp( env, 16 );
+      // store as F32, hence truncating
+      addInstr(env, PPCInstr_FpLdSt( False/*store*/, 4,
+                                     fsrc, zero_r1 ));
+      // and reload.  Good huh?! (sigh)
+      addInstr(env, PPCInstr_FpLdSt( True/*load*/, 4,
+                                     fdst, zero_r1 ));
+      add_to_sp( env, 16 );
+      return fdst;
+   }
+
     vex_printf("iselFltExpr(ppc): No such tag(%u)\n", e->tag);
     ppIRExpr(e);
     vpanic("iselFltExpr_wrk(ppc)");
@@ -2805,22 +2889,39 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
        return r_dst;
     }
  
-   if (e->tag == Iex_Binop) {
+   if (e->tag == Iex_Triop) {
        PPCFpOp fpop = Pfp_INVALID;
-      switch (e->Iex.Binop.op) {
-      case Iop_AddF64:    fpop = Pfp_ADD; break;
-      case Iop_SubF64:    fpop = Pfp_SUB; break;
-      case Iop_MulF64:    fpop = Pfp_MUL; break;
-      case Iop_DivF64:    fpop = Pfp_DIV; break;
-      default: break;
+      switch (e->Iex.Triop.op) {
+         case Iop_AddF64:    fpop = Pfp_ADDD; break;
+         case Iop_SubF64:    fpop = Pfp_SUBD; break;
+         case Iop_MulF64:    fpop = Pfp_MULD; break;
+         case Iop_DivF64:    fpop = Pfp_DIVD; break;
+         case Iop_AddF64r32: fpop = Pfp_ADDS; break;
+         case Iop_SubF64r32: fpop = Pfp_SUBS; break;
+         case Iop_MulF64r32: fpop = Pfp_MULS; break;
+         case Iop_DivF64r32: fpop = Pfp_DIVS; break;
+         default: break;
        }
        if (fpop != Pfp_INVALID) {
           HReg r_dst  = newVRegF(env);
-         HReg r_srcL = iselDblExpr(env, e->Iex.Binop.arg1);
-         HReg r_srcR = iselDblExpr(env, e->Iex.Binop.arg2);
+         HReg r_srcL = iselDblExpr(env, e->Iex.Triop.arg2);
+         HReg r_srcR = iselDblExpr(env, e->Iex.Triop.arg3);
+         set_FPU_rounding_mode( env, e->Iex.Triop.arg1 );
           addInstr(env, PPCInstr_FpBinary(fpop, r_dst, r_srcL, r_srcR));
           return r_dst;
        }
+   }
+
+   if (e->tag == Iex_Binop) {
+
+      if (e->Iex.Binop.op == Iop_RoundF64toF32) {
+         HReg r_dst = newVRegF(env);
+         HReg r_src = iselDblExpr(env, e->Iex.Binop.arg2);
+         set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+         addInstr(env, PPCInstr_FpRSP(r_dst, r_src));
+         //set_FPU_rounding_default( env );
+         return r_dst;
+      }
  
        if (e->Iex.Binop.op == Iop_I64toF64) {
           if (mode64) {
@@ -2841,8 +2942,8 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
  
              add_to_sp( env, 16 );
  
-            /* Restore default FPU rounding. */
-            set_FPU_rounding_default( env );
+            ///* Restore default FPU rounding. */
+            //set_FPU_rounding_default( env );
              return fdst;
           } else {
              /* 32-bit mode */
@@ -2867,11 +2968,12 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
  
              add_to_sp( env, 16 );
  
-            /* Restore default FPU rounding. */
-            set_FPU_rounding_default( env );
+            ///* Restore default FPU rounding. */
+            //set_FPU_rounding_default( env );
              return fdst;
           }
        }
+
     }
  
     if (e->tag == Iex_Unop) {
@@ -2880,7 +2982,6 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
           case Iop_NegF64:     fpop = Pfp_NEG; break;
           case Iop_AbsF64:     fpop = Pfp_ABS; break;
           case Iop_SqrtF64:    fpop = Pfp_SQRT; break;
-         case Iop_Est8FRecip: fpop = Pfp_RES; break;
           case Iop_Est5FRSqrt: fpop = Pfp_RSQRTE; break;
           default: break;
        }
@@ -3688,7 +3789,8 @@ HInstrArray* iselBB_PPC ( IRBB* bb, VexArchInfo* archinfo_host )
     env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
  
     /* and finally ... */
-   env->hwcaps = hwcaps_host;
+   env->hwcaps      = hwcaps_host;
+   env->previous_rm = NULL;
  
     /* For each IR temporary, allocate a suitably-kinded virtual
        register. */
diff --git a/VEX/priv/host-x86/isel.c b/VEX/priv/host-x86/isel.c

index 8174dfd475ea6be847c3c772a10c57eb0ed3522f..6240ae8fa01cb4f411879b86f7874117c10edff6 100644 (file)
--- a/VEX/priv/host-x86/isel.c
+++ b/VEX/priv/host-x86/isel.c
@@ -2631,7 +2631,7 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
        }
     }
  
-   if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64) {
+   if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
        HReg rf  = iselDblExpr(env, e->Iex.Binop.arg2);
        HReg dst = newVRegF(env);
  
diff --git a/VEX/priv/ir/irdefs.c b/VEX/priv/ir/irdefs.c

index ffda4bc8bcff34e749d3a696644d0913119d53d4..4295597e881a2e0f42878ad1e16c87054201f5ad 100644 (file)
--- a/VEX/priv/ir/irdefs.c
+++ b/VEX/priv/ir/irdefs.c
@@ -245,6 +245,10 @@ void ppIROp ( IROp op )
        case Iop_SubF64:    vex_printf("SubF64"); return;
        case Iop_MulF64:    vex_printf("MulF64"); return;
        case Iop_DivF64:    vex_printf("DivF64"); return;
+      case Iop_AddF64r32: vex_printf("AddF64r32"); return;
+      case Iop_SubF64r32: vex_printf("SubF64r32"); return;
+      case Iop_MulF64r32: vex_printf("MulF64r32"); return;
+      case Iop_DivF64r32: vex_printf("DivF64r32"); return;
  
        case Iop_ScaleF64:      vex_printf("ScaleF64"); return;
        case Iop_AtanF64:       vex_printf("AtanF64"); return;
@@ -263,8 +267,8 @@ void ppIROp ( IROp op )
        case Iop_TanF64:    vex_printf("TanF64"); return;
        case Iop_2xm1F64:   vex_printf("2xm1F64"); return;
  
-      case Iop_Est8FRecip: vex_printf("Est8FRecip"); return;
        case Iop_Est5FRSqrt: vex_printf("Est5FRSqrt"); return;
+      case Iop_TruncF64asF32: vex_printf("TruncF64asF32"); return;
  
        case Iop_CmpF64:    vex_printf("CmpF64"); return;
  
@@ -279,11 +283,11 @@ void ppIROp ( IROp op )
        case Iop_F32toF64: vex_printf("F32toF64"); return;
        case Iop_F64toF32: vex_printf("F64toF32"); return;
  
-      case Iop_RoundF64: vex_printf("RoundF64"); return;
+      case Iop_RoundF64toInt: vex_printf("RoundF64toInt"); return;
+      case Iop_RoundF64toF32: vex_printf("RoundF64toF32"); return;
  
        case Iop_ReinterpF64asI64: vex_printf("ReinterpF64asI64"); return;
        case Iop_ReinterpI64asF64: vex_printf("ReinterpI64asF64"); return;
-      case Iop_ReinterpF32asI32: vex_printf("ReinterpF32asI32"); return;
        case Iop_ReinterpI32asF32: vex_printf("ReinterpI32asF32"); return;
  
        case Iop_I32UtoFx4: vex_printf("Iop_I32UtoFx4"); return;
@@ -580,6 +584,16 @@ void ppIRExpr ( IRExpr* e )
      case Iex_Tmp:
        ppIRTemp(e->Iex.Tmp.tmp);
        break;
+    case Iex_Triop:
+      ppIROp(e->Iex.Triop.op);
+      vex_printf( "(" );
+      ppIRExpr(e->Iex.Triop.arg1);
+      vex_printf( "," );
+      ppIRExpr(e->Iex.Triop.arg2);
+      vex_printf( "," );
+      ppIRExpr(e->Iex.Triop.arg3);
+      vex_printf( ")" );
+      break;
      case Iex_Binop:
        ppIROp(e->Iex.Binop.op);
        vex_printf( "(" );
@@ -920,6 +934,16 @@ IRExpr* IRExpr_Tmp ( IRTemp tmp ) {
     e->Iex.Tmp.tmp = tmp;
     return e;
  }
+IRExpr* IRExpr_Triop  ( IROp op, IRExpr* arg1, 
+                                 IRExpr* arg2, IRExpr* arg3 ) {
+   IRExpr* e         = LibVEX_Alloc(sizeof(IRExpr));
+   e->tag            = Iex_Triop;
+   e->Iex.Triop.op   = op;
+   e->Iex.Triop.arg1 = arg1;
+   e->Iex.Triop.arg2 = arg2;
+   e->Iex.Triop.arg3 = arg3;
+   return e;
+}
  IRExpr* IRExpr_Binop ( IROp op, IRExpr* arg1, IRExpr* arg2 ) {
     IRExpr* e         = LibVEX_Alloc(sizeof(IRExpr));
     e->tag            = Iex_Binop;
@@ -1218,6 +1242,11 @@ IRExpr* dopyIRExpr ( IRExpr* e )
                              e->Iex.GetI.bias);
        case Iex_Tmp: 
           return IRExpr_Tmp(e->Iex.Tmp.tmp);
+      case Iex_Triop: 
+         return IRExpr_Triop(e->Iex.Triop.op,
+                             dopyIRExpr(e->Iex.Triop.arg1),
+                             dopyIRExpr(e->Iex.Triop.arg2),
+                             dopyIRExpr(e->Iex.Triop.arg3));
        case Iex_Binop: 
           return IRExpr_Binop(e->Iex.Binop.op,
                               dopyIRExpr(e->Iex.Binop.arg1),
@@ -1335,38 +1364,48 @@ IRBB* dopyIRBB ( IRBB* bb )
  /*---------------------------------------------------------------*/
  
  static
-void typeOfPrimop ( IROp op, IRType* t_dst, IRType* t_arg1, IRType* t_arg2 )
+void typeOfPrimop ( IROp op, 
+                    /*OUTs*/
+                    IRType* t_dst, 
+                    IRType* t_arg1, IRType* t_arg2, IRType* t_arg3 )
  {
-#  define UNARY(_td,_ta1)         \
+#  define UNARY(_ta1,_td)                                      \
        *t_dst = (_td); *t_arg1 = (_ta1); break
-#  define BINARY(_td,_ta1,_ta2)   \
+#  define BINARY(_ta1,_ta2,_td)                                \
       *t_dst = (_td); *t_arg1 = (_ta1); *t_arg2 = (_ta2); break
-#  define COMPARISON(_ta)         \
+#  define TERNARY(_ta1,_ta2,_ta3,_td)                          \
+     *t_dst = (_td); *t_arg1 = (_ta1);                         \
+     *t_arg2 = (_ta2); *t_arg3 = (_ta3); break
+#  define COMPARISON(_ta)                                      \
       *t_dst = Ity_I1; *t_arg1 = *t_arg2 = (_ta); break;
-#  define UNARY_COMPARISON(_ta)         \
+#  define UNARY_COMPARISON(_ta)                                \
       *t_dst = Ity_I1; *t_arg1 = (_ta); break;
  
+   /* Rounding mode values are always Ity_I32, encoded as per
+      IRRoundingMode */
+   const IRType ity_RMode = Ity_I32;
+
     *t_dst  = Ity_INVALID;
     *t_arg1 = Ity_INVALID;
     *t_arg2 = Ity_INVALID;
+   *t_arg3 = Ity_INVALID;
     switch (op) {
        case Iop_Add8: case Iop_Sub8: case Iop_Mul8: 
        case Iop_Or8:  case Iop_And8: case Iop_Xor8:
-         BINARY(Ity_I8, Ity_I8,Ity_I8);
+         BINARY(Ity_I8,Ity_I8, Ity_I8);
  
        case Iop_Add16: case Iop_Sub16: case Iop_Mul16:
        case Iop_Or16:  case Iop_And16: case Iop_Xor16:
-         BINARY(Ity_I16, Ity_I16,Ity_I16);
+         BINARY(Ity_I16,Ity_I16, Ity_I16);
  
        case Iop_CmpORD32U:
        case Iop_CmpORD32S:
        case Iop_Add32: case Iop_Sub32: case Iop_Mul32:
        case Iop_Or32:  case Iop_And32: case Iop_Xor32:
-         BINARY(Ity_I32, Ity_I32,Ity_I32);
+         BINARY(Ity_I32,Ity_I32, Ity_I32);
  
        case Iop_Add64: case Iop_Sub64: case Iop_Mul64:
        case Iop_Or64:  case Iop_And64: case Iop_Xor64:
-
        case Iop_CmpORD64U:
        case Iop_CmpORD64S:
        case Iop_Avg8Ux8: case Iop_Avg16Ux4:
@@ -1386,33 +1425,33 @@ void typeOfPrimop ( IROp op, IRType* t_dst, IRType* t_arg1, IRType* t_arg2 )
        case Iop_Sub8x8: case Iop_Sub16x4: case Iop_Sub32x2:
        case Iop_QSub8Sx8: case Iop_QSub16Sx4:
        case Iop_QSub8Ux8: case Iop_QSub16Ux4:
-         BINARY(Ity_I64, Ity_I64,Ity_I64);
+         BINARY(Ity_I64,Ity_I64, Ity_I64);
  
        case Iop_ShlN32x2: case Iop_ShlN16x4:
        case Iop_ShrN32x2: case Iop_ShrN16x4:
        case Iop_SarN32x2: case Iop_SarN16x4:
-         BINARY(Ity_I64, Ity_I64,Ity_I8);
+         BINARY(Ity_I64,Ity_I8, Ity_I64);
  
        case Iop_Shl8: case Iop_Shr8: case Iop_Sar8:
-         BINARY(Ity_I8, Ity_I8,Ity_I8);
+         BINARY(Ity_I8,Ity_I8, Ity_I8);
        case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
-         BINARY(Ity_I16, Ity_I16,Ity_I8);
+         BINARY(Ity_I16,Ity_I8, Ity_I16);
        case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
-         BINARY(Ity_I32, Ity_I32,Ity_I8);
+         BINARY(Ity_I32,Ity_I8, Ity_I32);
        case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
-         BINARY(Ity_I64, Ity_I64,Ity_I8);
+         BINARY(Ity_I64,Ity_I8, Ity_I64);
  
        case Iop_Not8: case Iop_Neg8:
-         UNARY(Ity_I8,Ity_I8);
+         UNARY(Ity_I8, Ity_I8);
        case Iop_Not16: case Iop_Neg16:
-         UNARY(Ity_I16,Ity_I16);
+         UNARY(Ity_I16, Ity_I16);
        case Iop_Not32: case Iop_Neg32:
-         UNARY(Ity_I32,Ity_I32);
+         UNARY(Ity_I32, Ity_I32);
  
        case Iop_Neg64:
        case Iop_Not64:
        case Iop_CmpNEZ32x2: case Iop_CmpNEZ16x4: case Iop_CmpNEZ8x8:
-         UNARY(Ity_I64,Ity_I64);
+         UNARY(Ity_I64, Ity_I64);
  
        case Iop_CmpEQ8: case Iop_CmpNE8:
           COMPARISON(Ity_I8);
@@ -1433,113 +1472,134 @@ void typeOfPrimop ( IROp op, IRType* t_dst, IRType* t_arg1, IRType* t_arg2 )
        case Iop_CmpNEZ64: UNARY_COMPARISON(Ity_I64);
  
        case Iop_MullU8: case Iop_MullS8:
-         BINARY(Ity_I16, Ity_I8,Ity_I8);
+         BINARY(Ity_I8,Ity_I8, Ity_I16);
        case Iop_MullU16: case Iop_MullS16:
-         BINARY(Ity_I32, Ity_I16,Ity_I16);
+         BINARY(Ity_I16,Ity_I16, Ity_I32);
        case Iop_MullU32: case Iop_MullS32:
-         BINARY(Ity_I64, Ity_I32,Ity_I32);
+         BINARY(Ity_I32,Ity_I32, Ity_I64);
        case Iop_MullU64: case Iop_MullS64:
-         BINARY(Ity_I128, Ity_I64,Ity_I64);
+         BINARY(Ity_I64,Ity_I64, Ity_I128);
  
        case Iop_Clz32: case Iop_Ctz32:
-         UNARY(Ity_I32,Ity_I32);
+         UNARY(Ity_I32, Ity_I32);
  
        case Iop_Clz64: case Iop_Ctz64:
-         UNARY(Ity_I64,Ity_I64);
+         UNARY(Ity_I64, Ity_I64);
  
        case Iop_DivU32: case Iop_DivS32:
-         BINARY(Ity_I32, Ity_I32,Ity_I32);
+         BINARY(Ity_I32,Ity_I32, Ity_I32);
  
        case Iop_DivU64: case Iop_DivS64:
-         BINARY(Ity_I64, Ity_I64, Ity_I64);
+         BINARY(Ity_I64,Ity_I64, Ity_I64);
  
        case Iop_DivModU64to32: case Iop_DivModS64to32:
-         BINARY(Ity_I64, Ity_I64,Ity_I32);
+         BINARY(Ity_I64,Ity_I32, Ity_I64);
  
        case Iop_DivModU128to64: case Iop_DivModS128to64:
-         BINARY(Ity_I128, Ity_I128,Ity_I64);
+         BINARY(Ity_I128,Ity_I64, Ity_I128);
  
        case Iop_16HIto8: case Iop_16to8:
-         UNARY(Ity_I8,Ity_I16);
+         UNARY(Ity_I16, Ity_I8);
        case Iop_8HLto16:
-         BINARY(Ity_I16, Ity_I8,Ity_I8);
+         BINARY(Ity_I8,Ity_I8, Ity_I16);
  
        case Iop_32HIto16: case Iop_32to16:
-         UNARY(Ity_I16,Ity_I32);
+         UNARY(Ity_I32, Ity_I16);
        case Iop_16HLto32:
-         BINARY(Ity_I32, Ity_I16,Ity_I16);
+         BINARY(Ity_I16,Ity_I16, Ity_I32);
  
        case Iop_64HIto32: case Iop_64to32:
-         UNARY(Ity_I32, Ity_I64);
+         UNARY(Ity_I64, Ity_I32);
        case Iop_32HLto64:
-         BINARY(Ity_I64, Ity_I32,Ity_I32);
+         BINARY(Ity_I32,Ity_I32, Ity_I64);
  
        case Iop_128HIto64: case Iop_128to64:
-         UNARY(Ity_I64, Ity_I128);
+         UNARY(Ity_I128, Ity_I64);
        case Iop_64HLto128:
-         BINARY(Ity_I128, Ity_I64,Ity_I64);
+         BINARY(Ity_I64,Ity_I64, Ity_I128);
  
-      case Iop_Not1:   UNARY(Ity_I1,Ity_I1);
-      case Iop_1Uto8:  UNARY(Ity_I8,Ity_I1);
-      case Iop_1Sto8:  UNARY(Ity_I8,Ity_I1);
-      case Iop_1Sto16: UNARY(Ity_I16,Ity_I1);
-      case Iop_1Uto32: case Iop_1Sto32: UNARY(Ity_I32,Ity_I1);
-      case Iop_1Sto64: case Iop_1Uto64: UNARY(Ity_I64,Ity_I1);
-      case Iop_32to1:  UNARY(Ity_I1,Ity_I32);
-      case Iop_64to1:  UNARY(Ity_I1,Ity_I64);
+      case Iop_Not1:   UNARY(Ity_I1, Ity_I1);
+      case Iop_1Uto8:  UNARY(Ity_I1, Ity_I8);
+      case Iop_1Sto8:  UNARY(Ity_I1, Ity_I8);
+      case Iop_1Sto16: UNARY(Ity_I1, Ity_I16);
+      case Iop_1Uto32: case Iop_1Sto32: UNARY(Ity_I1, Ity_I32);
+      case Iop_1Sto64: case Iop_1Uto64: UNARY(Ity_I1, Ity_I64);
+      case Iop_32to1:  UNARY(Ity_I32, Ity_I1);
+      case Iop_64to1:  UNARY(Ity_I64, Ity_I1);
  
        case Iop_8Uto32: case Iop_8Sto32:
-         UNARY(Ity_I32,Ity_I8);
+         UNARY(Ity_I8, Ity_I32);
  
        case Iop_8Uto16: case Iop_8Sto16:
-         UNARY(Ity_I16,Ity_I8);
+         UNARY(Ity_I8, Ity_I16);
  
        case Iop_16Uto32: case Iop_16Sto32: 
-         UNARY(Ity_I32,Ity_I16);
+         UNARY(Ity_I16, Ity_I32);
  
        case Iop_32Sto64: case Iop_32Uto64:
-         UNARY(Ity_I64,Ity_I32);
+         UNARY(Ity_I32, Ity_I64);
  
        case Iop_8Uto64: case Iop_8Sto64:
-         UNARY(Ity_I64,Ity_I8);
+         UNARY(Ity_I8, Ity_I64);
  
        case Iop_16Uto64: case Iop_16Sto64:
-         UNARY(Ity_I64,Ity_I16);
-      case Iop_64to16:
           UNARY(Ity_I16, Ity_I64);
+      case Iop_64to16:
+         UNARY(Ity_I64, Ity_I16);
  
-      case Iop_32to8: UNARY(Ity_I8,Ity_I32);
-      case Iop_64to8: UNARY(Ity_I8,Ity_I64);
+      case Iop_32to8: UNARY(Ity_I32, Ity_I8);
+      case Iop_64to8: UNARY(Ity_I64, Ity_I8);
+
+      case Iop_AddF64:    case Iop_SubF64: 
+      case Iop_MulF64:    case Iop_DivF64:
+      case Iop_AddF64r32: case Iop_SubF64r32: 
+      case Iop_MulF64r32: case Iop_DivF64r32:
+         TERNARY(ity_RMode,Ity_F64,Ity_F64, Ity_F64);
+
+      case Iop_NegF64: case Iop_AbsF64: 
+         UNARY(Ity_F64, Ity_F64);
+
+      case Iop_SqrtF64:
+      case Iop_SqrtF64r32:
+         BINARY(ity_RMode,Ity_F64, Ity_F64);
  
-      case Iop_ScaleF64: case Iop_PRemF64: case Iop_PRem1F64:
-      case Iop_AtanF64: case Iop_Yl2xF64:  case Iop_Yl2xp1F64: 
-      case Iop_AddF64: case Iop_SubF64: case Iop_MulF64: case Iop_DivF64:
-         BINARY(Ity_F64,Ity_F64,Ity_F64);
-      case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
        case Iop_CmpF64:
-         BINARY(Ity_I32,Ity_F64,Ity_F64);
-      case Iop_NegF64: case Iop_AbsF64: case Iop_SqrtF64:
-      case Iop_SinF64: case Iop_CosF64: case Iop_TanF64: case Iop_2xm1F64:
-      case Iop_Est8FRecip: case Iop_Est5FRSqrt:
-         UNARY(Ity_F64,Ity_F64);
+         BINARY(Ity_F64,Ity_F64, Ity_I32);
  
-      case Iop_ReinterpI64asF64: UNARY(Ity_F64, Ity_I64);
-      case Iop_ReinterpF64asI64: UNARY(Ity_I64, Ity_F64);
-      case Iop_ReinterpI32asF32: UNARY(Ity_F32, Ity_I32);
-      case Iop_ReinterpF32asI32: UNARY(Ity_I32, Ity_F32);
+      case Iop_F64toI16: BINARY(ity_RMode,Ity_F64, Ity_I16);
+      case Iop_F64toI32: BINARY(ity_RMode,Ity_F64, Ity_I32);
+      case Iop_F64toI64: BINARY(ity_RMode,Ity_F64, Ity_I64);
  
-      case Iop_F64toI16: BINARY(Ity_I16, Ity_I32,Ity_F64);
-      case Iop_F64toI32: BINARY(Ity_I32, Ity_I32,Ity_F64);
-      case Iop_F64toI64: BINARY(Ity_I64, Ity_I32,Ity_F64);
+      case Iop_I16toF64: UNARY(Ity_I16, Ity_F64);
+      case Iop_I32toF64: UNARY(Ity_I32, Ity_F64);
+      case Iop_I64toF64: BINARY(ity_RMode,Ity_I64, Ity_F64);
  
-      case Iop_I16toF64: UNARY(Ity_F64, Ity_I16);
-      case Iop_I32toF64: UNARY(Ity_F64, Ity_I32);
-      case Iop_I64toF64: BINARY(Ity_F64, Ity_I32,Ity_I64);
+      case Iop_F32toF64: UNARY(Ity_F32, Ity_F64);
+      case Iop_F64toF32: BINARY(ity_RMode,Ity_F64, Ity_F32);
  
-      case Iop_F32toF64: UNARY(Ity_F64, Ity_F32);
-      case Iop_F64toF32: BINARY(Ity_F32, Ity_I32,Ity_F64);
+      case Iop_ReinterpI64asF64: UNARY(Ity_I64, Ity_F64);
+      case Iop_ReinterpF64asI64: UNARY(Ity_F64, Ity_I64);
+      case Iop_ReinterpI32asF32: UNARY(Ity_I32, Ity_F32);
  
-      case Iop_RoundF64: BINARY(Ity_F64, Ity_I32,Ity_F64);
+      case Iop_AtanF64: case Iop_Yl2xF64:  case Iop_Yl2xp1F64: 
+      case Iop_ScaleF64: case Iop_PRemF64: case Iop_PRem1F64:
+         TERNARY(ity_RMode,Ity_F64,Ity_F64, Ity_F64);
+
+      case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
+         TERNARY(ity_RMode,Ity_F64,Ity_F64, Ity_I32);
+
+      case Iop_SinF64: case Iop_CosF64: case Iop_TanF64: 
+      case Iop_2xm1F64:
+      case Iop_RoundF64toInt: BINARY(ity_RMode,Ity_F64, Ity_F64);
+
+      case Iop_Est5FRSqrt:
+         UNARY(Ity_F64, Ity_F64);
+      case Iop_RoundF64toF32:
+         BINARY(ity_RMode,Ity_F64, Ity_F64);
+      case Iop_CalcFPRF:
+         UNARY(Ity_F64, Ity_I32);
+      case Iop_TruncF64asF32:
+         UNARY(Ity_F64, Ity_F32);
  
        case Iop_I32UtoFx4:
        case Iop_I32StoFx4:
@@ -1551,19 +1611,19 @@ void typeOfPrimop ( IROp op, IRType* t_dst, IRType* t_arg1, IRType* t_arg2 )
        case Iop_RoundF32x4_RZ:
           UNARY(Ity_V128, Ity_V128);
  
-      case Iop_64HLtoV128: BINARY(Ity_V128, Ity_I64,Ity_I64);
+      case Iop_64HLtoV128: BINARY(Ity_I64,Ity_I64, Ity_V128);
        case Iop_V128to64: case Iop_V128HIto64: 
-         UNARY(Ity_I64, Ity_V128);
+         UNARY(Ity_V128, Ity_I64);
  
-      case Iop_V128to32:    UNARY(Ity_I32, Ity_V128);
-      case Iop_32UtoV128:   UNARY(Ity_V128, Ity_I32);
-      case Iop_64UtoV128:   UNARY(Ity_V128, Ity_I64);
-      case Iop_SetV128lo32: BINARY(Ity_V128, Ity_V128,Ity_I32);
-      case Iop_SetV128lo64: BINARY(Ity_V128, Ity_V128,Ity_I64);
+      case Iop_V128to32:    UNARY(Ity_V128, Ity_I32);
+      case Iop_32UtoV128:   UNARY(Ity_I32, Ity_V128);
+      case Iop_64UtoV128:   UNARY(Ity_I64, Ity_V128);
+      case Iop_SetV128lo32: BINARY(Ity_V128,Ity_I32, Ity_V128);
+      case Iop_SetV128lo64: BINARY(Ity_V128,Ity_I64, Ity_V128);
  
-      case Iop_Dup8x16: UNARY(Ity_V128, Ity_I8);
-      case Iop_Dup16x8: UNARY(Ity_V128, Ity_I16);
-      case Iop_Dup32x4: UNARY(Ity_V128, Ity_I32);
+      case Iop_Dup8x16: UNARY(Ity_I8, Ity_V128);
+      case Iop_Dup16x8: UNARY(Ity_I16, Ity_V128);
+      case Iop_Dup32x4: UNARY(Ity_I32, Ity_V128);
  
        case Iop_CmpEQ32Fx4: case Iop_CmpLT32Fx4:
        case Iop_CmpEQ64Fx2: case Iop_CmpLT64Fx2:
@@ -1621,7 +1681,7 @@ void typeOfPrimop ( IROp op, IRType* t_dst, IRType* t_arg1, IRType* t_arg2 )
        case Iop_InterleaveLO8x16: case Iop_InterleaveLO16x8: 
        case Iop_InterleaveLO32x4: case Iop_InterleaveLO64x2:
        case Iop_Perm8x16:
-         BINARY(Ity_V128, Ity_V128,Ity_V128);
+         BINARY(Ity_V128,Ity_V128, Ity_V128);
  
        case Iop_NotV128:
        case Iop_Recip32Fx4: case Iop_Recip32F0x4:
@@ -1635,10 +1695,12 @@ void typeOfPrimop ( IROp op, IRType* t_dst, IRType* t_arg1, IRType* t_arg2 )
           UNARY(Ity_V128, Ity_V128);
  
        case Iop_ShlV128: case Iop_ShrV128:
-      case Iop_ShlN8x16: case Iop_ShlN16x8: case Iop_ShlN32x4: case Iop_ShlN64x2:
-      case Iop_ShrN8x16: case Iop_ShrN16x8: case Iop_ShrN32x4: case Iop_ShrN64x2:
+      case Iop_ShlN8x16: case Iop_ShlN16x8: 
+      case Iop_ShlN32x4: case Iop_ShlN64x2:
+      case Iop_ShrN8x16: case Iop_ShrN16x8: 
+      case Iop_ShrN32x4: case Iop_ShrN64x2:
        case Iop_SarN8x16: case Iop_SarN16x8: case Iop_SarN32x4:
-         BINARY(Ity_V128, Ity_V128, Ity_I8);
+         BINARY(Ity_V128,Ity_I8, Ity_V128);
  
        default:
           ppIROp(op);
@@ -1646,6 +1708,7 @@ void typeOfPrimop ( IROp op, IRType* t_dst, IRType* t_arg1, IRType* t_arg2 )
     }
  #  undef UNARY
  #  undef BINARY
+#  undef TERNARY
  #  undef COMPARISON
  #  undef UNARY_COMPARISON
  }
@@ -1730,7 +1793,7 @@ IRType typeOfIRConst ( IRConst* con )
  
  IRType typeOfIRExpr ( IRTypeEnv* tyenv, IRExpr* e )
  {
-   IRType t_dst, t_arg1, t_arg2;
+   IRType t_dst, t_arg1, t_arg2, t_arg3;
   start:
     switch (e->tag) {
        case Iex_Load:
@@ -1743,11 +1806,14 @@ IRType typeOfIRExpr ( IRTypeEnv* tyenv, IRExpr* e )
           return typeOfIRTemp(tyenv, e->Iex.Tmp.tmp);
        case Iex_Const:
           return typeOfIRConst(e->Iex.Const.con);
+      case Iex_Triop:
+         typeOfPrimop(e->Iex.Triop.op, &t_dst, &t_arg1, &t_arg2, &t_arg3);
+         return t_dst;
        case Iex_Binop:
-         typeOfPrimop(e->Iex.Binop.op, &t_dst, &t_arg1, &t_arg2);
+         typeOfPrimop(e->Iex.Binop.op, &t_dst, &t_arg1, &t_arg2, &t_arg3);
           return t_dst;
        case Iex_Unop:
-         typeOfPrimop(e->Iex.Unop.op, &t_dst, &t_arg1, &t_arg2);
+         typeOfPrimop(e->Iex.Unop.op, &t_dst, &t_arg1, &t_arg2, &t_arg3);
           return t_dst;
        case Iex_CCall:
           return e->Iex.CCall.retty;
@@ -1817,6 +1883,10 @@ Bool isFlatIRStmt ( IRStmt* st )
              case Iex_Get:    return True;
              case Iex_GetI:   return isIRAtom(e->Iex.GetI.ix);
              case Iex_Tmp:    return True;
+            case Iex_Triop:  return toBool(
+                                    isIRAtom(e->Iex.Triop.arg1) 
+                                    && isIRAtom(e->Iex.Triop.arg2)
+                                    && isIRAtom(e->Iex.Triop.arg3));
              case Iex_Binop:  return toBool(
                                      isIRAtom(e->Iex.Binop.arg1) 
                                      && isIRAtom(e->Iex.Binop.arg2));
@@ -1955,6 +2025,11 @@ void useBeforeDef_Expr ( IRBB* bb, IRStmt* stmt, IRExpr* expr, Int* def_counts )
        case Iex_Tmp:
           useBeforeDef_Temp(bb,stmt,expr->Iex.Tmp.tmp,def_counts);
           break;
+      case Iex_Triop:
+         useBeforeDef_Expr(bb,stmt,expr->Iex.Triop.arg1,def_counts);
+         useBeforeDef_Expr(bb,stmt,expr->Iex.Triop.arg2,def_counts);
+         useBeforeDef_Expr(bb,stmt,expr->Iex.Triop.arg3,def_counts);
+         break;
        case Iex_Binop:
           useBeforeDef_Expr(bb,stmt,expr->Iex.Binop.arg1,def_counts);
           useBeforeDef_Expr(bb,stmt,expr->Iex.Binop.arg2,def_counts);
@@ -2028,7 +2103,7 @@ static
  void tcExpr ( IRBB* bb, IRStmt* stmt, IRExpr* expr, IRType gWordTy )
  {
     Int        i;
-   IRType     t_dst, t_arg1, t_arg2;
+   IRType     t_dst, t_arg1, t_arg2, t_arg3;
     IRTypeEnv* tyenv = bb->tyenv;
     switch (expr->tag) {
        case Iex_Get:
@@ -2041,12 +2116,56 @@ void tcExpr ( IRBB* bb, IRStmt* stmt, IRExpr* expr, IRType gWordTy )
           if (!saneIRArray(expr->Iex.GetI.descr))
              sanityCheckFail(bb,stmt,"IRExpr.GetI.descr: invalid descr");
           break;
+      case Iex_Triop: {
+         IRType ttarg1, ttarg2, ttarg3;
+         tcExpr(bb,stmt, expr->Iex.Triop.arg1, gWordTy );
+         tcExpr(bb,stmt, expr->Iex.Triop.arg2, gWordTy );
+         tcExpr(bb,stmt, expr->Iex.Triop.arg3, gWordTy );
+         typeOfPrimop(expr->Iex.Triop.op, &t_dst, &t_arg1, &t_arg2, &t_arg3);
+         if (t_arg1 == Ity_INVALID || t_arg2 == Ity_INVALID 
+                                   || t_arg3 == Ity_INVALID) {
+            vex_printf(" op name: " );
+            ppIROp(expr->Iex.Triop.op);
+            vex_printf("\n");
+            sanityCheckFail(bb,stmt,
+               "Iex.Triop: wrong arity op\n"
+               "... name of op precedes BB printout\n");
+         }
+         ttarg1 = typeOfIRExpr(tyenv, expr->Iex.Triop.arg1);
+         ttarg2 = typeOfIRExpr(tyenv, expr->Iex.Triop.arg2);
+         ttarg3 = typeOfIRExpr(tyenv, expr->Iex.Triop.arg3);
+         if (t_arg1 != ttarg1 || t_arg2 != ttarg2 || t_arg3 != ttarg3) {
+            vex_printf(" op name: ");
+            ppIROp(expr->Iex.Triop.op);
+            vex_printf("\n");
+            vex_printf(" op type is (");
+            ppIRType(t_arg1);
+            vex_printf(",");
+            ppIRType(t_arg2);
+            vex_printf(",");
+            ppIRType(t_arg3);
+            vex_printf(") -> ");
+            ppIRType (t_dst);
+            vex_printf("\narg tys are (");
+            ppIRType(ttarg1);
+            vex_printf(",");
+            ppIRType(ttarg2);
+            vex_printf(",");
+            ppIRType(ttarg3);
+            vex_printf(")\n");
+            sanityCheckFail(bb,stmt,
+               "Iex.Triop: arg tys don't match op tys\n"
+               "... additional details precede BB printout\n");
+         }
+         break;
+      }
        case Iex_Binop: {
           IRType ttarg1, ttarg2;
           tcExpr(bb,stmt, expr->Iex.Binop.arg1, gWordTy );
           tcExpr(bb,stmt, expr->Iex.Binop.arg2, gWordTy );
-         typeOfPrimop(expr->Iex.Binop.op, &t_dst, &t_arg1, &t_arg2);
-         if (t_arg1 == Ity_INVALID || t_arg2 == Ity_INVALID) {
+         typeOfPrimop(expr->Iex.Binop.op, &t_dst, &t_arg1, &t_arg2, &t_arg3);
+         if (t_arg1 == Ity_INVALID || t_arg2 == Ity_INVALID 
+                                   || t_arg3 != Ity_INVALID) {
              vex_printf(" op name: " );
              ppIROp(expr->Iex.Binop.op);
              vex_printf("\n");
@@ -2079,8 +2198,9 @@ void tcExpr ( IRBB* bb, IRStmt* stmt, IRExpr* expr, IRType gWordTy )
        }
        case Iex_Unop:
           tcExpr(bb,stmt, expr->Iex.Unop.arg, gWordTy );
-         typeOfPrimop(expr->Iex.Binop.op, &t_dst, &t_arg1, &t_arg2);
-         if (t_arg1 == Ity_INVALID || t_arg2 != Ity_INVALID)
+         typeOfPrimop(expr->Iex.Binop.op, &t_dst, &t_arg1, &t_arg2, &t_arg3);
+         if (t_arg1 == Ity_INVALID || t_arg2 != Ity_INVALID
+                                   || t_arg3 != Ity_INVALID)
              sanityCheckFail(bb,stmt,"Iex.Unop: wrong arity op");
           if (t_arg1 != typeOfIRExpr(tyenv, expr->Iex.Unop.arg))
              sanityCheckFail(bb,stmt,"Iex.Unop: arg ty doesn't match op ty");
diff --git a/VEX/priv/ir/iropt.c b/VEX/priv/ir/iropt.c

index 3d04fd2f98c36520a7d9f305037325be1eeea0f4..97e774a919d361baa6e9b8ea2695ca06e9cf9c01 100644 (file)
--- a/VEX/priv/ir/iropt.c
+++ b/VEX/priv/ir/iropt.c
@@ -121,6 +121,7 @@
     Level 2: the following sequence
        * Flatten into atomic form.
        * Cheap transformations.
+      * If block contains any floating or vector types, CSE.
        * If block contains GetI or PutI, Expensive transformations.
        * Try unrolling loops.  Three possible outcomes:
          - No effect: do nothing more.
@@ -296,6 +297,15 @@ static IRExpr* flatten_Expr ( IRBB* bb, IRExpr* ex )
              IRStmt_Tmp(t1, ex));
           return IRExpr_Tmp(t1);
  
+      case Iex_Triop:
+         t1 = newIRTemp(bb->tyenv, ty);
+         addStmtToIRBB(bb, IRStmt_Tmp(t1, 
+            IRExpr_Triop(ex->Iex.Triop.op,
+                         flatten_Expr(bb, ex->Iex.Triop.arg1),
+                         flatten_Expr(bb, ex->Iex.Triop.arg2),
+                         flatten_Expr(bb, ex->Iex.Triop.arg3))));
+         return IRExpr_Tmp(t1);
+
        case Iex_Binop:
           t1 = newIRTemp(bb->tyenv, ty);
           addStmtToIRBB(bb, IRStmt_Tmp(t1, 
@@ -1406,6 +1416,13 @@ static IRExpr* fold_Expr ( IRExpr* e )
              e2 = IRExpr_Const(IRConst_U32(0));
           } else
  
+         /* And32(0,x) ==> 0 */
+         if (e->Iex.Binop.op == Iop_And32
+             && e->Iex.Binop.arg1->tag == Iex_Const
+             && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) {
+            e2 = IRExpr_Const(IRConst_U32(0));
+         } else
+
           /* Or32(0,x) ==> x */
           if (e->Iex.Binop.op == Iop_Or32
               && e->Iex.Binop.arg1->tag == Iex_Const
@@ -1413,6 +1430,13 @@ static IRExpr* fold_Expr ( IRExpr* e )
              e2 = e->Iex.Binop.arg2;
           } else
  
+         /* Or64(0,x) ==> x */
+         if (e->Iex.Binop.op == Iop_Or64
+             && e->Iex.Binop.arg1->tag == Iex_Const
+             && e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 == 0) {
+            e2 = e->Iex.Binop.arg2;
+         } else
+
           /* Or8/16/32/64(t,t) ==> t, for some IRTemp t */
           /* And8/16/32/64(t,t) ==> t, for some IRTemp t */
           if (   (e->Iex.Binop.op == Iop_And64
@@ -1500,6 +1524,17 @@ static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
              ex->Iex.GetI.bias
           );
  
+      case Iex_Triop:
+         vassert(isIRAtom(ex->Iex.Triop.arg1));
+         vassert(isIRAtom(ex->Iex.Triop.arg2));
+         vassert(isIRAtom(ex->Iex.Triop.arg3));
+         return IRExpr_Triop(
+                   ex->Iex.Triop.op,
+                   subst_Expr(env, ex->Iex.Triop.arg1),
+                   subst_Expr(env, ex->Iex.Triop.arg2),
+                   subst_Expr(env, ex->Iex.Triop.arg3)
+                );
+
        case Iex_Binop:
           vassert(isIRAtom(ex->Iex.Binop.arg1));
           vassert(isIRAtom(ex->Iex.Binop.arg2));
@@ -1779,6 +1814,11 @@ static void addUses_Expr ( Bool* set, IRExpr* e )
        case Iex_Load:
           addUses_Expr(set, e->Iex.Load.addr);
           return;
+      case Iex_Triop:
+         addUses_Expr(set, e->Iex.Triop.arg1);
+         addUses_Expr(set, e->Iex.Triop.arg2);
+         addUses_Expr(set, e->Iex.Triop.arg3);
+         return;
        case Iex_Binop:
           addUses_Expr(set, e->Iex.Binop.arg1);
           addUses_Expr(set, e->Iex.Binop.arg2);
@@ -2199,11 +2239,10 @@ void do_cse_BB ( IRBB* bb )
  
     vassert(sizeof(IRTemp) <= sizeof(HWord));
  
-   //ppIRBB(bb);
-   //vex_printf("\n\n");
+   if (0) { ppIRBB(bb); vex_printf("\n\n"); }
  
     /* Iterate forwards over the stmts.  
-      On seeing "t = E", where E is one of the 3 AvailExpr forms:
+      On seeing "t = E", where E is one of the 5 AvailExpr forms:
           let E' = apply tenv substitution to E
           search aenv for E'
              if a mapping E' -> q is found, 
@@ -2253,10 +2292,11 @@ void do_cse_BB ( IRBB* bb )
        }
     }
  
-   //ppIRBB(bb);
-   //sanityCheckIRBB(bb, Ity_I32);
-   //vex_printf("\n\n");
-      
+   /*
+   ppIRBB(bb);
+   sanityCheckIRBB(bb, Ity_I32);
+   vex_printf("\n\n");
+   */
  }
  
  
@@ -2883,6 +2923,11 @@ static void deltaIRExpr ( IRExpr* e, Int delta )
        case Iex_GetI:
           deltaIRExpr(e->Iex.GetI.ix, delta);
           break;
+      case Iex_Triop:
+         deltaIRExpr(e->Iex.Triop.arg1, delta);
+         deltaIRExpr(e->Iex.Triop.arg2, delta);
+         deltaIRExpr(e->Iex.Triop.arg3, delta);
+         break;
        case Iex_Binop:
           deltaIRExpr(e->Iex.Binop.arg1, delta);
           deltaIRExpr(e->Iex.Binop.arg2, delta);
@@ -3248,6 +3293,11 @@ static void setHints_Expr (Bool* doesLoad, Bool* doesGet, IRExpr* e )
           setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.expr0);
           setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.exprX);
           return;
+      case Iex_Triop:
+         setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg1);
+         setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg2);
+         setHints_Expr(doesLoad, doesGet, e->Iex.Triop.arg3);
+         return;
        case Iex_Binop:
           setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg1);
           setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg2);
@@ -3310,6 +3360,12 @@ static void aoccCount_Expr ( UShort* uses, IRExpr* e )
           aoccCount_Expr(uses, e->Iex.Mux0X.exprX);
           return;
  
+      case Iex_Triop: 
+         aoccCount_Expr(uses, e->Iex.Triop.arg1);
+         aoccCount_Expr(uses, e->Iex.Triop.arg2);
+         aoccCount_Expr(uses, e->Iex.Triop.arg3);
+         return;
+
        case Iex_Binop: 
           aoccCount_Expr(uses, e->Iex.Binop.arg1);
           aoccCount_Expr(uses, e->Iex.Binop.arg2);
@@ -3439,6 +3495,13 @@ static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
                     atbSubst_Expr(env, e->Iex.Mux0X.expr0),
                     atbSubst_Expr(env, e->Iex.Mux0X.exprX)
                  );
+      case Iex_Triop:
+         return IRExpr_Triop(
+                   e->Iex.Triop.op,
+                   atbSubst_Expr(env, e->Iex.Triop.arg1),
+                   atbSubst_Expr(env, e->Iex.Triop.arg2),
+                   atbSubst_Expr(env, e->Iex.Triop.arg3)
+                );
        case Iex_Binop:
           return IRExpr_Binop(
                     e->Iex.Binop.op,
@@ -3626,7 +3689,7 @@ static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
           /* optional extra: dump dead bindings as we find them.
              Removes the need for a prior dead-code removal pass. */
           if (uses[st->Ist.Tmp.tmp] == 0) {
-          //vex_printf("DEAD binding\n");
+           if (0) vex_printf("DEAD binding\n");
              continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
           }
           vassert(uses[st->Ist.Tmp.tmp] == 1);
@@ -3726,7 +3789,7 @@ static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
  /*--- iropt main                                              ---*/
  /*---------------------------------------------------------------*/
  
-static Bool iropt_verbose = False; //True;
+static Bool iropt_verbose = False; /* True; */
  
  
  /* Do a simple cleanup pass on bb.  This is: redundant Get removal,
@@ -3792,17 +3855,23 @@ IRBB* expensive_transformations( IRBB* bb )
  }
  
  
-/* Scan a flattened BB to see if it has any GetI or PutIs in it.  Used
-   as a heuristic hack to see if iropt needs to do expensive
-   optimisations (CSE, PutI -> GetI forwarding, redundant PutI
-   elimination) to improve code containing GetI or PutI.  */
+/* Scan a flattened BB to look for signs that more expensive
+   optimisations might be useful:
+   - find out if there are any GetIs and PutIs
+   - find out if there are any floating or vector-typed temporaries
+*/
  
-static Bool hasGetIorPutI ( IRBB* bb )
+static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
+                                 /*OUT*/Bool* hasVorFtemps,
+                                 IRBB* bb )
  {
     Int i, j;
     IRStmt* st;
     IRDirty* d;
  
+   *hasGetIorPutI = False;
+   *hasVorFtemps  = False;
+
     for (i = 0; i < bb->stmts_used; i++) {
        st = bb->stmts[i];
        switch (st->tag) {
@@ -3810,10 +3879,21 @@ static Bool hasGetIorPutI ( IRBB* bb )
              vassert(isIRAtom(st->Ist.AbiHint.base));
              break;
           case Ist_PutI: 
-            return True;
+            *hasGetIorPutI = True;
+            break;
           case Ist_Tmp:  
              if (st->Ist.Tmp.data->tag == Iex_GetI)
-               return True;
+               *hasGetIorPutI = True;
+            switch (typeOfIRTemp(bb->tyenv, st->Ist.Tmp.tmp)) {
+               case Ity_I1: case Ity_I8: case Ity_I16: 
+               case Ity_I32: case Ity_I64: case Ity_I128: 
+                  break;
+               case Ity_F32: case Ity_F64: case Ity_V128: 
+                  *hasVorFtemps = True;
+                  break;
+               default: 
+                  goto bad;
+            }
              break;
           case Ist_Put:
              vassert(isIRAtom(st->Ist.Put.data));
@@ -3838,13 +3918,11 @@ static Bool hasGetIorPutI ( IRBB* bb )
              vassert(isIRAtom(st->Ist.Exit.guard));
              break;
           default: 
+         bad:
              ppIRStmt(st);
              vpanic("hasGetIorPutI");
        }
-
     }
-   return False;
-
  }
  
  
@@ -3867,7 +3945,7 @@ IRBB* do_iropt_BB ( IRBB* bb0,
     static Int n_total     = 0;
     static Int n_expensive = 0;
  
-   Bool do_expensive;
+   Bool hasGetIorPutI, hasVorFtemps;
     IRBB *bb, *bb2;
  
     n_total++;
@@ -3894,8 +3972,20 @@ IRBB* do_iropt_BB ( IRBB* bb0,
     bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
  
     if (vex_control.iropt_level > 1) {
-      do_expensive = hasGetIorPutI(bb);
-      if (do_expensive) {
+
+      /* Peer at what we have, to decide how much more effort to throw
+         at it. */
+      considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
+
+      if (hasVorFtemps) {
+         /* If any evidence of FP or Vector activity, CSE, as that
+            tends to mop up all manner of lardy code to do with
+            rounding modes. */
+         do_cse_BB( bb );
+         do_deadcode_BB( bb );
+      }
+
+      if (hasGetIorPutI) {
           n_expensive++;
           if (DEBUG_IROPT)
              vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
@@ -3909,7 +3999,7 @@ IRBB* do_iropt_BB ( IRBB* bb0,
        bb2 = maybe_loop_unroll_BB( bb, guest_addr );
        if (bb2) {
           bb = cheap_transformations( bb2, specHelper, preciseMemExnsFn );
-         if (do_expensive) {
+         if (hasGetIorPutI) {
              bb = expensive_transformations( bb );
              bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
           } else {
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h

index 2a5d06e20fbd5f929d9690edc321c46f2c3bbaa5..aa879a571589b178fa756079727249f6640f3f98 100644 (file)
--- a/VEX/pub/libvex_ir.h
+++ b/VEX/pub/libvex_ir.h
@@ -333,35 +333,24 @@ typedef
  
        /* ------ Floating point.  We try to be IEEE754 compliant. ------ */
  
-      /* Binary operations mandated by IEEE754. */
-      Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64, /* Iop_RemF64, */
+      /* --- Simple stuff as mandated by 754. --- */
  
-      /* Binary ops supported by IA32 but not mandated by 754. */
-      Iop_AtanF64,       /* FPATAN,  arctan(arg1/arg2)       */
-      Iop_Yl2xF64,       /* FYL2X,   arg1 * log2(arg2)       */
-      Iop_Yl2xp1F64,     /* FYL2XP1, arg1 * log2(arg2+1.0)   */
-      Iop_PRemF64,       /* FPREM,   non-IEEE remainder(arg1/arg2)    */
-      Iop_PRemC3210F64,  /* C3210 flags resulting from FPREM, :: I32 */
-      Iop_PRem1F64,      /* FPREM1,  IEEE remainder(arg1/arg2)    */
-      Iop_PRem1C3210F64, /* C3210 flags resulting from FPREM1, :: I32 */
-      Iop_ScaleF64,      /* FSCALE,  arg1 * (2^RoundTowardsZero(arg2)) */
-      /* Note that on x86 guest, PRem1{C3210} has the same behaviour
-         as the IEEE mandated RemF64, except it is limited in the
-         range of its operand.  Hence the partialness. */
+      /* Binary operations, with rounding. */
+      /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */ 
+      Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64,
  
-      /* Unary operations mandated by IEEE754. */
-      Iop_NegF64, Iop_SqrtF64, 
+      /* Variants of the above which produce a 64-bit result but which
+         round their result to a IEEE float range first. */
+      /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */ 
+      Iop_AddF64r32, Iop_SubF64r32, Iop_MulF64r32, Iop_DivF64r32, 
  
-      /* Unary ops supported by IA32 but not mandated by 754. */
-      Iop_AbsF64,    /* FABS */
-      Iop_SinF64,    /* FSIN */
-      Iop_CosF64,    /* FCOS */
-      Iop_TanF64,    /* FTAN */
-      Iop_2xm1F64,   /* (2^arg - 1.0) */
+      /* Unary operations, without rounding. */
+      /* :: F64 -> F64 */
+      Iop_NegF64, Iop_AbsF64,
  
-      /* Unary ops supported by PPC but not mandated by 754. */
-      Iop_Est8FRecip, /* reciprocal estimate, 8 good bits */
-      Iop_Est5FRSqrt, /* reciprocal square root estimate, 5 good bits */
+      /* Unary operations, with rounding. */
+      /* :: IRRoundingMode(I32) x F64 -> F64 */
+      Iop_SqrtF64, Iop_SqrtF64r32,
  
        /* Comparison, yielding GT/LT/EQ/UN(ordered), as per the following:
              0x45 Unordered
@@ -374,6 +363,7 @@ typedef
        Iop_CmpF64,
  
        /* --- Int to/from FP conversions. --- */
+
        /* For the most part, these take a first argument :: Ity_I32
           (as IRRoundingMode) which is an indication of the rounding
           mode to use, as per the following encoding:
@@ -410,14 +400,52 @@ typedef
        Iop_F32toF64,  /*                       F32 -> F64 */
        Iop_F64toF32,  /* IRRoundingMode(I32) x F64 -> F32 */
  
-      /* F64 -> F64, also takes an I32 first argument encoding the
-         rounding mode. */
-      Iop_RoundF64,
-
        /* Reinterpretation.  Take an F64 and produce an I64 with 
           the same bit pattern, or vice versa. */
        Iop_ReinterpF64asI64, Iop_ReinterpI64asF64,
-      Iop_ReinterpF32asI32, Iop_ReinterpI32asF32,
+                            Iop_ReinterpI32asF32,
+
+      /* --- guest x86/amd64 specifics, not mandated by 754. --- */
+
+      /* Binary ops, with rounding. */
+      /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */ 
+      Iop_AtanF64,       /* FPATAN,  arctan(arg1/arg2)       */
+      Iop_Yl2xF64,       /* FYL2X,   arg1 * log2(arg2)       */
+      Iop_Yl2xp1F64,     /* FYL2XP1, arg1 * log2(arg2+1.0)   */
+      Iop_PRemF64,       /* FPREM,   non-IEEE remainder(arg1/arg2)    */
+      Iop_PRemC3210F64,  /* C3210 flags resulting from FPREM, :: I32 */
+      Iop_PRem1F64,      /* FPREM1,  IEEE remainder(arg1/arg2)    */
+      Iop_PRem1C3210F64, /* C3210 flags resulting from FPREM1, :: I32 */
+      Iop_ScaleF64,      /* FSCALE,  arg1 * (2^RoundTowardsZero(arg2)) */
+      /* Note that on x86 guest, PRem1{C3210} has the same behaviour
+         as the IEEE mandated RemF64, except it is limited in the
+         range of its operand.  Hence the partialness. */
+
+      /* Unary ops, with rounding. */
+      /* :: IRRoundingMode(I32) x F64 -> F64 */
+      Iop_SinF64,    /* FSIN */
+      Iop_CosF64,    /* FCOS */
+      Iop_TanF64,    /* FTAN */
+      Iop_2xm1F64,   /* (2^arg - 1.0) */
+      Iop_RoundF64toInt, /* F64 value to nearest integral value (still
+                            as F64) */
+
+      /* --- guest ppc32/64 specifics, not mandated by 754. --- */
+
+      /* :: F64 -> F64 */
+      Iop_Est5FRSqrt,    /* reciprocal square root estimate, 5 good bits */
+
+      /* :: F64 -> F32 */
+      Iop_TruncF64asF32, /* do F64->F32 truncation as per 'fsts' */
+
+      /* :: IRRoundingMode(I32) x F64 -> F64 */
+      Iop_RoundF64toF32, /* round F64 to nearest F32 value (still as F64) */
+      /* NB: pretty much the same as Iop_F64toF32, except no change 
+         of type. */
+
+      /* :: F64 -> I32 */
+      Iop_CalcFPRF, /* Calc 5 fpscr[FPRF] bits (Class, <, =, >, Unord)
+                       from FP result */
  
        /* ------------------ 64-bit SIMD Integer. ------------------ */
  
@@ -629,9 +657,8 @@ typedef
  extern void ppIROp ( IROp );
  
  
-/* Encoding of IEEE754-specified rounding modes in Float -> Int
-   conversions.  This is the same as the encoding used by Intel IA32
-   to indicate x87 rounding mode. */
+/* Encoding of IEEE754-specified rounding modes.  This is the same as
+   the encoding used by Intel IA32 to indicate x87 rounding mode. */
  typedef
     enum { Irrm_NEAREST=0, Irrm_NegINF=1, Irrm_PosINF=2, Irrm_ZERO=3 }
     IRRoundingMode;
@@ -726,6 +753,7 @@ typedef
        Iex_Get,     /* read guest state, fixed offset */
        Iex_GetI,    /* read guest state, run-time offset */
        Iex_Tmp,     /* value of temporary */
+      Iex_Triop,   /* ternary operation */
        Iex_Binop,   /* binary operation */
        Iex_Unop,    /* unary operation */
        Iex_Load,    /* read from memory */ 
@@ -754,6 +782,12 @@ typedef
           struct {
              IRTemp tmp;
           } Tmp;
+         struct {
+            IROp op;
+            struct _IRExpr* arg1;
+            struct _IRExpr* arg2;
+            struct _IRExpr* arg3;
+         } Triop;
           struct {
              IROp op;
              struct _IRExpr* arg1;
@@ -789,6 +823,8 @@ extern IRExpr* IRExpr_Binder ( Int binder );
  extern IRExpr* IRExpr_Get    ( Int off, IRType ty );
  extern IRExpr* IRExpr_GetI   ( IRArray* descr, IRExpr* ix, Int bias );
  extern IRExpr* IRExpr_Tmp    ( IRTemp tmp );
+extern IRExpr* IRExpr_Triop  ( IROp op, IRExpr* arg1, 
+                                        IRExpr* arg2, IRExpr* arg3 );
  extern IRExpr* IRExpr_Binop  ( IROp op, IRExpr* arg1, IRExpr* arg2 );
  extern IRExpr* IRExpr_Unop   ( IROp op, IRExpr* arg );
  extern IRExpr* IRExpr_Load   ( IREndness end, IRType ty, IRExpr* addr );
author	Julian Seward <jseward@acm.org>
	Fri, 3 Feb 2006 16:08:03 +0000 (16:08 +0000)
committer	Julian Seward <jseward@acm.org>
	Fri, 3 Feb 2006 16:08:03 +0000 (16:08 +0000)
VEX/priv/guest-amd64/toIR.c		patch \| blob \| blame \| history
VEX/priv/guest-ppc/toIR.c		patch \| blob \| blame \| history
VEX/priv/guest-x86/toIR.c		patch \| blob \| blame \| history
VEX/priv/host-amd64/isel.c		patch \| blob \| blame \| history
VEX/priv/host-ppc/hdefs.c		patch \| blob \| blame \| history
VEX/priv/host-ppc/hdefs.h		patch \| blob \| blame \| history
VEX/priv/host-ppc/isel.c		patch \| blob \| blame \| history
VEX/priv/host-x86/isel.c		patch \| blob \| blame \| history
VEX/priv/ir/irdefs.c		patch \| blob \| blame \| history
VEX/priv/ir/iropt.c		patch \| blob \| blame \| history
VEX/pub/libvex_ir.h		patch \| blob \| blame \| history