Implement ROUNDSS (partial implementation, in the case where

author Julian Seward <jseward@acm.org>

Sun, 27 Jun 2010 09:06:34 +0000 (09:06 +0000)

committer Julian Seward <jseward@acm.org>

Sun, 27 Jun 2010 09:06:34 +0000 (09:06 +0000)
author Julian Seward <jseward@acm.org>
Sun, 27 Jun 2010 09:06:34 +0000 (09:06 +0000)
committer Julian Seward <jseward@acm.org>
Sun, 27 Jun 2010 09:06:34 +0000 (09:06 +0000)
diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c

index 4868478eb6e89d49d49367cd2441bd527e1383bc..8fc2ff40291e3e85484e9b02c6e1a8dab4b08dc0 100644 (file)
--- a/VEX/priv/guest_amd64_toIR.c
+++ b/VEX/priv/guest_amd64_toIR.c
@@ -15005,30 +15005,38 @@ DisResult disInstr_AMD64_WRK (
     }
  
  
-   /* 66 0f 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1 (Partial
-      implementation only -- only deal with cases where the rounding
-      mode is specified directly by the immediate byte. */
-   if (have66noF2noF3( pfx ) 
+   /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
+      (Partial implementation only -- only deal with cases where
+      the rounding mode is specified directly by the immediate byte.)
+      66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
+      (Limitations ditto)
+   */
+   if (have66noF2noF3(pfx) 
         && sz == 2 
-       && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0B) {
-
-      modrm = insn[3];
+       && insn[0] == 0x0F && insn[1] == 0x3A
+       && (insn[2] == 0x0B || insn[2] == 0x0A)) {
  
-      IRTemp src = newTemp(Ity_F64);
-      IRTemp res = newTemp(Ity_F64);
+      Bool   isD = insn[2] == 0x0B;
+      IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
+      IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
        Int    imm = 0;
  
+      modrm = insn[3];
+
        if (epartIsReg(modrm)) {
-         assign( src, getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) );
+         assign( src, 
+                 isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 )
+                     : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
           imm = insn[3+1];
           if (imm & ~3) goto decode_failure;
           delta += 3+1+1;
-         DIP( "roundsd $%d,%s,%s\n",
+         DIP( "rounds%c $%d,%s,%s\n",
+              isD ? 'd' : 's',
                imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
                     nameXMMReg( gregOfRexRM(pfx, modrm) ) );
        } else {
           addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
-         assign( src, loadLE( Ity_F64, mkexpr(addr) ));
+         assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
           imm = insn[3+alen];
           if (imm & ~3) goto decode_failure;
           delta += 3+alen+1;
@@ -15040,9 +15048,13 @@ DisResult disInstr_AMD64_WRK (
           that encoding is the same as the encoding for IRRoundingMode,
           we can use that value directly in the IR as a rounding
           mode. */
-      assign(res, binop(Iop_RoundF64toInt, mkU32(imm & 3), mkexpr(src)) );
+      assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
+                  mkU32(imm & 3), mkexpr(src)) );
  
-      putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
+      if (isD)
+         putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
+      else
+         putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
  
        goto decode_success;
     }
diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c

index eccf940853a6402cd418201b12a6ad4bc34edf66..1c3d012f7d465780250f7dc8a2bf87809c5a1e2e 100644 (file)
--- a/VEX/priv/host_amd64_defs.c
+++ b/VEX/priv/host_amd64_defs.c
@@ -809,12 +809,14 @@ AMD64Instr* AMD64Instr_A87Free ( Int nregs )
     vassert(nregs >= 1 && nregs <= 7);
     return i;
  }
-AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush )
+AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB )
  {
     AMD64Instr* i            = LibVEX_Alloc(sizeof(AMD64Instr));
     i->tag                   = Ain_A87PushPop;
     i->Ain.A87PushPop.addr   = addr;
     i->Ain.A87PushPop.isPush = isPush;
+   i->Ain.A87PushPop.szB    = szB;
+   vassert(szB == 8 || szB == 4);
     return i;
  }
  AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op )
@@ -1195,7 +1197,8 @@ void ppAMD64Instr ( AMD64Instr* i, Bool mode64 )
           vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs );
           break;
        case Ain_A87PushPop:
-         vex_printf(i->Ain.A87PushPop.isPush ? "fldl " : "fstpl ");
+         vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ",
+                    i->Ain.A87PushPop.szB == 4 ? 's' : 'l');
           ppAMD64AMode(i->Ain.A87PushPop.addr);
           break;
        case Ain_A87FpOp:
@@ -2948,17 +2951,18 @@ Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i,
        goto done;
  
     case Ain_A87PushPop:
+      vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4);
        if (i->Ain.A87PushPop.isPush) {
-         /* Load from memory into %st(0): fldl amode */
+         /* Load from memory into %st(0): flds/fldl amode */
           *p++ = clearWBit(
                     rexAMode_M(fake(0), i->Ain.A87PushPop.addr) );
-         *p++ = 0xDD;
+         *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
          p = doAMode_M(p, fake(0)/*subopcode*/, i->Ain.A87PushPop.addr);
        } else {
-         /* Dump %st(0) to memory: fstpl amode */
+         /* Dump %st(0) to memory: fstps/fstpl amode */
           *p++ = clearWBit(
                     rexAMode_M(fake(3), i->Ain.A87PushPop.addr) );
-         *p++ = 0xDD;
+         *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
           p = doAMode_M(p, fake(3)/*subopcode*/, i->Ain.A87PushPop.addr);
           goto done;
        }
diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h

index 21965cd31acbca20fc24eaac97dbdcb8604c7a33..cf49ae148c6f0ef30913b1ffd3e78938aec8ff4b 100644 (file)
--- a/VEX/priv/host_amd64_defs.h
+++ b/VEX/priv/host_amd64_defs.h
@@ -546,12 +546,13 @@ typedef
              Int nregs; /* 1 <= nregs <= 7 */
           } A87Free;
  
-         /* Push a 64-bit FP value from memory onto the stack, or move
-            a value from the stack to memory and remove it from the
-            stack. */
+         /* Push a 32- or 64-bit FP value from memory onto the stack,
+            or move a value from the stack to memory and remove it
+            from the stack. */
           struct {
              AMD64AMode* addr;
              Bool        isPush;
+            UChar       szB; /* 4 or 8 */
           } A87PushPop;
  
           /* Do an operation on the top-of-stack.  This can be unary, in
@@ -694,7 +695,7 @@ extern AMD64Instr* AMD64Instr_ACAS       ( AMD64AMode* addr, UChar sz );
  extern AMD64Instr* AMD64Instr_DACAS      ( AMD64AMode* addr, UChar sz );
  
  extern AMD64Instr* AMD64Instr_A87Free    ( Int nregs );
-extern AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush );
+extern AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB );
  extern AMD64Instr* AMD64Instr_A87FpOp    ( A87FpOp op );
  extern AMD64Instr* AMD64Instr_A87LdCW    ( AMD64AMode* addr );
  extern AMD64Instr* AMD64Instr_A87StSW    ( AMD64AMode* addr );
diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c

index ddf7503e96cfa8da2803b10cb36c55cadd21e60a..ff38e17bb10b665d3586bef99b17e27cc1e46f82 100644 (file)
--- a/VEX/priv/host_amd64_isel.c
+++ b/VEX/priv/host_amd64_isel.c
@@ -1776,11 +1776,11 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
  
           /* one arg -> top of x87 stack */
           addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp));
-         addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+         addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
  
           /* other arg -> top of x87 stack */
           addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp));
-         addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+         addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
  
           switch (e->Iex.Triop.op) {
              case Iop_PRemC3210F64:
@@ -2794,6 +2794,30 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
         return dst;
     }
  
+   if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
+      AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
+      HReg        arg    = iselFltExpr(env, e->Iex.Binop.arg2);
+      HReg        dst    = newVRegV(env);
+
+      /* rf now holds the value to be rounded.  The first thing to do
+         is set the FPU's rounding mode accordingly. */
+
+      /* Set host x87 rounding mode */
+      set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+
+      addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, arg, m8_rsp));
+      addInstr(env, AMD64Instr_A87Free(1));
+      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 4));
+      addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
+      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 4));
+      addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, dst, m8_rsp));
+
+      /* Restore default x87 rounding. */
+      set_FPU_rounding_default( env );
+
+      return dst;
+   }
+
     ppIRExpr(e);
     vpanic("iselFltExpr_wrk");
  }
@@ -2937,9 +2961,9 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
  
        addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
        addInstr(env, AMD64Instr_A87Free(1));
-      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
        addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
-      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
+      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
        addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
  
        /* Restore default x87 rounding. */
@@ -2968,12 +2992,12 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
        /* one arg -> top of x87 stack */
        addInstr(env, AMD64Instr_SseLdSt(
                         False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp));
-      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
  
        /* other arg -> top of x87 stack */
        addInstr(env, AMD64Instr_SseLdSt(
                         False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp));
-      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
  
        /* do it */
        /* XXXROUNDINGFIXME */
@@ -3002,7 +3026,7 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
        }
  
        /* save result */
-      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
+      addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
        addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
        return dst;
     }
@@ -3067,15 +3091,15 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
           Int     nNeeded    = e->Iex.Binop.op==Iop_TanF64 ? 2 : 1;
           addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
           addInstr(env, AMD64Instr_A87Free(nNeeded));
-         addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+         addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
           /* XXXROUNDINGFIXME */
           /* set roundingmode here */
           addInstr(env, AMD64Instr_A87FpOp(fpop));
           if (e->Iex.Binop.op==Iop_TanF64) {
              /* get rid of the extra 1.0 that fptan pushes */
-            addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
+            addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
           }
-         addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
+         addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
           addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
           return dst;
        }
diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c

index 0e6f2edadffc85ed915e4c32e66e489918a4d561..4d35401d5eac9ef5dbeef86f436bf7a734c89967 100644 (file)
--- a/VEX/priv/ir_defs.c
+++ b/VEX/priv/ir_defs.c
@@ -302,6 +302,7 @@ void ppIROp ( IROp op )
        case Iop_F64toF32: vex_printf("F64toF32"); return;
  
        case Iop_RoundF64toInt: vex_printf("RoundF64toInt"); return;
+      case Iop_RoundF32toInt: vex_printf("RoundF32toInt"); return;
        case Iop_RoundF64toF32: vex_printf("RoundF64toF32"); return;
  
        case Iop_ReinterpF64asI64: vex_printf("ReinterpF64asI64"); return;
@@ -1796,6 +1797,7 @@ void typeOfPrimop ( IROp op,
           BINARY(ity_RMode,Ity_F64, Ity_F64);
  
        case Iop_SqrtF32:
+      case Iop_RoundF32toInt:
           BINARY(ity_RMode,Ity_F32, Ity_F32);
  
        case Iop_CmpF64:
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h

index 4b7d628fb446d50e4c8d136a4616dd98f7272dad..0e291ebb3c65185afb7e5c72507cb16ccf8569e6 100644 (file)
--- a/VEX/pub/libvex_ir.h
+++ b/VEX/pub/libvex_ir.h
@@ -639,6 +639,8 @@ typedef
        Iop_2xm1F64,   /* (2^arg - 1.0) */
        Iop_RoundF64toInt, /* F64 value to nearest integral value (still
                              as F64) */
+      Iop_RoundF32toInt, /* F32 value to nearest integral value (still
+                            as F32) */
  
        /* --- guest ppc32/64 specifics, not mandated by 754. --- */
author	Julian Seward <jseward@acm.org>
	Sun, 27 Jun 2010 09:06:34 +0000 (09:06 +0000)
committer	Julian Seward <jseward@acm.org>
	Sun, 27 Jun 2010 09:06:34 +0000 (09:06 +0000)
VEX/priv/guest_amd64_toIR.c		patch \| blob \| blame \| history
VEX/priv/host_amd64_defs.c		patch \| blob \| blame \| history
VEX/priv/host_amd64_defs.h		patch \| blob \| blame \| history
VEX/priv/host_amd64_isel.c		patch \| blob \| blame \| history
VEX/priv/ir_defs.c		patch \| blob \| blame \| history
VEX/pub/libvex_ir.h		patch \| blob \| blame \| history