From: Julian Seward <jseward@acm.org>
Date: Sun, 27 Jun 2010 09:06:34 +0000 (+0000)
Subject: Implement ROUNDSS (partial implementation, in the case where
X-Git-Tag: svn/VALGRIND_3_6_1^2~84
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7af731cb2ffc86a0055d8fb94cf43c8ff3894808;p=thirdparty%2Fvalgrind.git

Implement ROUNDSS (partial implementation, in the case where
the rounding mode is specified within the instruction itself).


git-svn-id: svn://svn.valgrind.org/vex/trunk@1986
---

diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c
index 4868478eb6..8fc2ff4029 100644
--- a/VEX/priv/guest_amd64_toIR.c
+++ b/VEX/priv/guest_amd64_toIR.c
@@ -15005,30 +15005,38 @@ DisResult disInstr_AMD64_WRK (
    }
 
 
-   /* 66 0f 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1 (Partial
-      implementation only -- only deal with cases where the rounding
-      mode is specified directly by the immediate byte. */
-   if (have66noF2noF3( pfx ) 
+   /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
+      (Partial implementation only -- only deal with cases where
+      the rounding mode is specified directly by the immediate byte.)
+      66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
+      (Limitations ditto)
+   */
+   if (have66noF2noF3(pfx) 
        && sz == 2 
-       && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0B) {
-
-      modrm = insn[3];
+       && insn[0] == 0x0F && insn[1] == 0x3A
+       && (insn[2] == 0x0B || insn[2] == 0x0A)) {
 
-      IRTemp src = newTemp(Ity_F64);
-      IRTemp res = newTemp(Ity_F64);
+      Bool   isD = insn[2] == 0x0B;
+      IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
+      IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
       Int    imm = 0;
 
+      modrm = insn[3];
+
       if (epartIsReg(modrm)) {
-         assign( src, getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) );
+         assign( src, 
+                 isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 )
+                     : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
          imm = insn[3+1];
          if (imm & ~3) goto decode_failure;
          delta += 3+1+1;
-         DIP( "roundsd $%d,%s,%s\n",
+         DIP( "rounds%c $%d,%s,%s\n",
+              isD ? 'd' : 's',
               imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
                    nameXMMReg( gregOfRexRM(pfx, modrm) ) );
       } else {
          addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
-         assign( src, loadLE( Ity_F64, mkexpr(addr) ));
+         assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
          imm = insn[3+alen];
          if (imm & ~3) goto decode_failure;
          delta += 3+alen+1;
@@ -15040,9 +15048,13 @@ DisResult disInstr_AMD64_WRK (
          that encoding is the same as the encoding for IRRoundingMode,
          we can use that value directly in the IR as a rounding
          mode. */
-      assign(res, binop(Iop_RoundF64toInt, mkU32(imm & 3), mkexpr(src)) );
+      assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
+                  mkU32(imm & 3), mkexpr(src)) );
 
-      putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
+      if (isD)
+         putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
+      else
+         putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
 
       goto decode_success;
    }
diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c
index eccf940853..1c3d012f7d 100644
--- a/VEX/priv/host_amd64_defs.c
+++ b/VEX/priv/host_amd64_defs.c
@@ -809,12 +809,14 @@ AMD64Instr* AMD64Instr_A87Free ( Int nregs )
    vassert(nregs >= 1 && nregs <= 7);
    return i;
 }
-AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush )
+AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB )
 {
    AMD64Instr* i            = LibVEX_Alloc(sizeof(AMD64Instr));
    i->tag                   = Ain_A87PushPop;
    i->Ain.A87PushPop.addr   = addr;
    i->Ain.A87PushPop.isPush = isPush;
+   i->Ain.A87PushPop.szB    = szB;
+   vassert(szB == 8 || szB == 4);
    return i;
 }
 AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op )
@@ -1195,7 +1197,8 @@ void ppAMD64Instr ( AMD64Instr* i, Bool mode64 )
          vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs );
          break;
       case Ain_A87PushPop:
-         vex_printf(i->Ain.A87PushPop.isPush ? "fldl " : "fstpl ");
+         vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ",
+                    i->Ain.A87PushPop.szB == 4 ? 's' : 'l');
          ppAMD64AMode(i->Ain.A87PushPop.addr);
          break;
       case Ain_A87FpOp:
@@ -2948,17 +2951,18 @@ Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i,
       goto done;
 
    case Ain_A87PushPop:
+      vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4);
       if (i->Ain.A87PushPop.isPush) {
-         /* Load from memory into %st(0): fldl amode */
+         /* Load from memory into %st(0): flds/fldl amode */
          *p++ = clearWBit(
                    rexAMode_M(fake(0), i->Ain.A87PushPop.addr) );
-         *p++ = 0xDD;
+         *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
 	 p = doAMode_M(p, fake(0)/*subopcode*/, i->Ain.A87PushPop.addr);
       } else {
-         /* Dump %st(0) to memory: fstpl amode */
+         /* Dump %st(0) to memory: fstps/fstpl amode */
          *p++ = clearWBit(
                    rexAMode_M(fake(3), i->Ain.A87PushPop.addr) );
-         *p++ = 0xDD;
+         *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
          p = doAMode_M(p, fake(3)/*subopcode*/, i->Ain.A87PushPop.addr);
          goto done;
       }
diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h
index 21965cd31a..cf49ae148c 100644
--- a/VEX/priv/host_amd64_defs.h
+++ b/VEX/priv/host_amd64_defs.h
@@ -546,12 +546,13 @@ typedef
             Int nregs; /* 1 <= nregs <= 7 */
          } A87Free;
 
-         /* Push a 64-bit FP value from memory onto the stack, or move
-            a value from the stack to memory and remove it from the
-            stack. */
+         /* Push a 32- or 64-bit FP value from memory onto the stack,
+            or move a value from the stack to memory and remove it
+            from the stack. */
          struct {
             AMD64AMode* addr;
             Bool        isPush;
+            UChar       szB; /* 4 or 8 */
          } A87PushPop;
 
          /* Do an operation on the top-of-stack.  This can be unary, in
@@ -694,7 +695,7 @@ extern AMD64Instr* AMD64Instr_ACAS       ( AMD64AMode* addr, UChar sz );
 extern AMD64Instr* AMD64Instr_DACAS      ( AMD64AMode* addr, UChar sz );
 
 extern AMD64Instr* AMD64Instr_A87Free    ( Int nregs );
-extern AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush );
+extern AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB );
 extern AMD64Instr* AMD64Instr_A87FpOp    ( A87FpOp op );
 extern AMD64Instr* AMD64Instr_A87LdCW    ( AMD64AMode* addr );
 extern AMD64Instr* AMD64Instr_A87StSW    ( AMD64AMode* addr );
diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c
index ddf7503e96..ff38e17bb1 100644
--- a/VEX/priv/host_amd64_isel.c
+++ b/VEX/priv/host_amd64_isel.c
@@ -1776,11 +1776,11 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
 
          /* one arg -> top of x87 stack */
          addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp));
-         addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+         addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
 
          /* other arg -> top of x87 stack */
          addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp));
-         addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+         addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
 
          switch (e->Iex.Triop.op) {
             case Iop_PRemC3210F64:
@@ -2794,6 +2794,30 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
        return dst;
    }
 
+   if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
+      AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
+      HReg        arg    = iselFltExpr(env, e->Iex.Binop.arg2);
+      HReg        dst    = newVRegV(env);
+
+      /* rf now holds the value to be rounded.  The first thing to do
+         is set the FPU's rounding mode accordingly. */
+
+      /* Set host x87 rounding mode */
+      set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+
+      addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, arg, m8_rsp));
+      addInstr(env, AMD64Instr_A87Free(1));
+      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 4));
+      addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
+      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 4));
+      addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, dst, m8_rsp));
+
+      /* Restore default x87 rounding. */
+      set_FPU_rounding_default( env );
+
+      return dst;
+   }
+
    ppIRExpr(e);
    vpanic("iselFltExpr_wrk");
 }
@@ -2937,9 +2961,9 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
 
       addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
       addInstr(env, AMD64Instr_A87Free(1));
-      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
       addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
-      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
+      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
       addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
 
       /* Restore default x87 rounding. */
@@ -2968,12 +2992,12 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
       /* one arg -> top of x87 stack */
       addInstr(env, AMD64Instr_SseLdSt(
                        False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp));
-      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
 
       /* other arg -> top of x87 stack */
       addInstr(env, AMD64Instr_SseLdSt(
                        False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp));
-      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
 
       /* do it */
       /* XXXROUNDINGFIXME */
@@ -3002,7 +3026,7 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
       }
 
       /* save result */
-      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
+      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
       addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
       return dst;
    }
@@ -3067,15 +3091,15 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
          Int     nNeeded    = e->Iex.Binop.op==Iop_TanF64 ? 2 : 1;
          addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
          addInstr(env, AMD64Instr_A87Free(nNeeded));
-         addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+         addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
          /* XXXROUNDINGFIXME */
          /* set roundingmode here */
          addInstr(env, AMD64Instr_A87FpOp(fpop));
          if (e->Iex.Binop.op==Iop_TanF64) {
             /* get rid of the extra 1.0 that fptan pushes */
-            addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
+            addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
          }
-         addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
+         addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
          addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
          return dst;
       }
diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c
index 0e6f2edadf..4d35401d5e 100644
--- a/VEX/priv/ir_defs.c
+++ b/VEX/priv/ir_defs.c
@@ -302,6 +302,7 @@ void ppIROp ( IROp op )
       case Iop_F64toF32: vex_printf("F64toF32"); return;
 
       case Iop_RoundF64toInt: vex_printf("RoundF64toInt"); return;
+      case Iop_RoundF32toInt: vex_printf("RoundF32toInt"); return;
       case Iop_RoundF64toF32: vex_printf("RoundF64toF32"); return;
 
       case Iop_ReinterpF64asI64: vex_printf("ReinterpF64asI64"); return;
@@ -1796,6 +1797,7 @@ void typeOfPrimop ( IROp op,
          BINARY(ity_RMode,Ity_F64, Ity_F64);
 
       case Iop_SqrtF32:
+      case Iop_RoundF32toInt:
          BINARY(ity_RMode,Ity_F32, Ity_F32);
 
       case Iop_CmpF64:
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h
index 4b7d628fb4..0e291ebb3c 100644
--- a/VEX/pub/libvex_ir.h
+++ b/VEX/pub/libvex_ir.h
@@ -639,6 +639,8 @@ typedef
       Iop_2xm1F64,   /* (2^arg - 1.0) */
       Iop_RoundF64toInt, /* F64 value to nearest integral value (still
                             as F64) */
+      Iop_RoundF32toInt, /* F32 value to nearest integral value (still
+                            as F32) */
 
       /* --- guest ppc32/64 specifics, not mandated by 754. --- */