From: Dejan Jevtic <dejan.jevtic@valgrind.org>
Date: Thu, 14 Nov 2013 15:44:42 +0000 (+0000)
Subject: mips32/64: Fixed the problem with fpu instructions.
X-Git-Tag: svn/VALGRIND_3_10_1^2~171
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ca85aff382e0aa72b078d29a6c5a8a6f9bafa7b3;p=thirdparty%2Fvalgrind.git

mips32/64: Fixed the problem with fpu instructions.

Include the value of fcsr register when emitting
some fpu instructions.
Calculate a new value of the fcsr register after
some FPU instructions.


git-svn-id: svn://svn.valgrind.org/vex/trunk@2799
---

diff --git a/VEX/priv/guest_mips_defs.h b/VEX/priv/guest_mips_defs.h
index f30abc96c7..dc7163c3af 100644
--- a/VEX/priv/guest_mips_defs.h
+++ b/VEX/priv/guest_mips_defs.h
@@ -85,7 +85,8 @@ typedef enum {
    TRUNCWS,  TRUNCWD, TRUNCLS, TRUNCLD,
    CVTDS,    CVTDW,   CVTSD,   CVTSW,
    CVTWS,    CVTWD,   CVTDL,   CVTLS,
-   CVTLD,    CVTSL
+   CVTLD,    CVTSL,   ADDS,    ADDD,
+   SUBS,     SUBD,    DIVS
 } flt_op;
 
 extern UInt mips32_dirtyhelper_mfc0 ( UInt rd, UInt sel );
@@ -99,7 +100,7 @@ extern ULong mips64_dirtyhelper_rdhwr ( ULong rt, ULong rd );
 #endif
 
 extern UInt mips_dirtyhelper_calculate_FCSR ( void* guest_state, UInt fs,
-                                              flt_op op );
+                                              UInt ft, flt_op op );
 
 /*---------------------------------------------------------*/
 /*---               Condition code stuff                ---*/
diff --git a/VEX/priv/guest_mips_helpers.c b/VEX/priv/guest_mips_helpers.c
index 6a8a563a28..b8b25daab7 100644
--- a/VEX/priv/guest_mips_helpers.c
+++ b/VEX/priv/guest_mips_helpers.c
@@ -1107,145 +1107,211 @@ ULong mips64_dirtyhelper_rdhwr ( ULong rt, ULong rd )
 }
 #endif
 
-#define ASM_VOLATILE_ROUND32(fs, inst)                              \
-   __asm__ volatile("cfc1    $t0, $31"  "\n\t"                      \
-                    "ctc1    %2,  $31"  "\n\t"                      \
-                    "mtc1    %1,  $f0"  "\n\t"                      \
-                    ""#inst" $f0, $f0"  "\n\t"                      \
-                    "cfc1    %0,  $31"  "\n\t"                      \
-                    "ctc1    $t0, $31"  "\n\t"                      \
+#define ASM_VOLATILE_UNARY32(inst)                                  \
+   __asm__ volatile("cfc1  $t0,  $31"   "\n\t"                      \
+                    "ctc1  %2,   $31"   "\n\t"                      \
+                    "mtc1  %1,   $f20"  "\n\t"                      \
+                    #inst" $f20, $f20"  "\n\t"                      \
+                    "cfc1  %0,   $31"   "\n\t"                      \
+                    "ctc1  $t0,  $31"   "\n\t"                      \
                     : "=r" (ret)                                    \
-                    : "r" (loVal), "r" (fcsr)                       \
-                    : "t0", "$f0", "$f1"                            \
+                    : "r" (loFsVal), "r" (fcsr)                     \
+                    : "t0", "$f20"                                  \
                    );
 
-#define ASM_VOLATILE_ROUND32_DOUBLE(fs, inst)                       \
-   __asm__ volatile("cfc1    $t0, $31"  "\n\t"                      \
-                    "ctc1    %3,  $31"  "\n\t"                      \
-                    "mtc1    %1,  $f0"  "\n\t"                      \
-                    "mtc1    %2,  $f1"  "\n\t"                      \
-                    ""#inst" $f0, $f0"  "\n\t"                      \
-                    "cfc1    %0,  $31"  "\n\t"                      \
-                    "ctc1    $t0, $31"  "\n\t"                      \
+#define ASM_VOLATILE_UNARY32_DOUBLE(inst)                           \
+   __asm__ volatile("cfc1  $t0,  $31"   "\n\t"                      \
+                    "ctc1  %3,   $31"   "\n\t"                      \
+                    "mtc1  %1,   $f20"  "\n\t"                      \
+                    "mtc1  %2,   $f21"  "\n\t"                      \
+                    #inst" $f20, $f20"  "\n\t"                      \
+                    "cfc1  %0,   $31"   "\n\t"                      \
+                    "ctc1  $t0,  $31"   "\n\t"                      \
                     : "=r" (ret)                                    \
-                    : "r" (loVal), "r" (hiVal), "r" (fcsr)          \
-                    : "t0", "$f0", "$f1"                            \
+                    : "r" (loFsVal), "r" (hiFsVal), "r" (fcsr)      \
+                    : "t0", "$f20", "$f21"                          \
                    );
 
-#define ASM_VOLATILE_ROUND64(fs, inst)                              \
-   __asm__ volatile("cfc1     $t0, $31"  "\n\t"                     \
-                    "ctc1     %2,  $31"  "\n\t"                     \
-                    "dmtc1    %1,  $f0"  "\n\t"                     \
-                    ""#inst"  $f0, $f0"  "\n\t"                     \
-                    "cfc1     %0,  $31"  "\n\t"                     \
-                    "ctc1     $t0, $31"  "\n\t"                     \
+#define ASM_VOLATILE_UNARY64(inst)                                  \
+   __asm__ volatile("cfc1   $t0,  $31"   "\n\t"                     \
+                    "ctc1   %2,   $31"   "\n\t"                     \
+                    "dmtc1  %1,   $f24"  "\n\t"                     \
+                    #inst"  $f24, $f24"  "\n\t"                     \
+                    "cfc1   %0,   $31"   "\n\t"                     \
+                    "ctc1   $t0,  $31"   "\n\t"                     \
                     : "=r" (ret)                                    \
-                    : "r" (addr[fs]), "r" (fcsr)                    \
-                    : "t0", "$f0"                                   \
+                    : "r" (fsVal), "r" (fcsr)                       \
+                    : "t0", "$f24"                                  \
+                   );
+
+#define ASM_VOLATILE_BINARY32(inst)                                 \
+   __asm__ volatile("cfc1  $t0,  $31"         "\n\t"                \
+                    "ctc1  %3,   $31"         "\n\t"                \
+                    "mtc1  %1,   $f20"        "\n\t"                \
+                    "mtc1  %2,   $f22"        "\n\t"                \
+                    #inst" $f20, $f20, $f22"  "\n\t"                \
+                    "cfc1  %0,   $31"         "\n\t"                \
+                    "ctc1  $t0,  $31"         "\n\t"                \
+                    : "=r" (ret)                                    \
+                    : "r" (loFsVal), "r" (loFtVal), "r" (fcsr)      \
+                    : "t0", "$f20", "$f22"                          \
+                   );
+
+#define ASM_VOLATILE_BINARY32_DOUBLE(inst)                          \
+   __asm__ volatile("cfc1  $t0,  $31"         "\n\t"                \
+                    "ctc1  %5,   $31"         "\n\t"                \
+                    "mtc1  %1,   $f20"        "\n\t"                \
+                    "mtc1  %2,   $f21"        "\n\t"                \
+                    "mtc1  %3,   $f22"        "\n\t"                \
+                    "mtc1  %4,   $f23"        "\n\t"                \
+                    #inst" $f20, $f20, $f22"  "\n\t"                \
+                    "cfc1  %0,   $31"         "\n\t"                \
+                    "ctc1  $t0,  $31"         "\n\t"                \
+                    : "=r" (ret)                                    \
+                    : "r" (loFsVal), "r" (hiFsVal), "r" (loFtVal),  \
+                      "r" (hiFtVal), "r" (fcsr)                     \
+                    : "t0", "$f20", "$f21", "$f22", "$f23"          \
+                   );
+
+#define ASM_VOLATILE_BINARY64(inst)                                 \
+   __asm__ volatile("cfc1  $t0,  $31"         "\n\t"                \
+                    "ctc1  %3,   $31"         "\n\t"                \
+                    "dmtc1 %1,   $f24"        "\n\t"                \
+                    "dmtc1 %2,   $f25"        "\n\t"                \
+                    #inst" $f24, $f24, $f25"  "\n\t"                \
+                    "cfc1  %0,   $31"         "\n\t"                \
+                    "ctc1  $t0,  $31"         "\n\t"                \
+                    : "=r" (ret)                                    \
+                    : "r" (fsVal), "r" (ftVal), "r" (fcsr)          \
+                    : "t0", "$f24", "$f25"                          \
                    );
 
 /* TODO: Add cases for all fpu instructions because all fpu instructions are
          change the value of FCSR register. */
-extern UInt mips_dirtyhelper_calculate_FCSR ( void* gs, UInt fs, flt_op inst )
+extern UInt mips_dirtyhelper_calculate_FCSR ( void* gs, UInt fs, UInt ft,
+                                              flt_op inst )
 {
    UInt ret = 0;
 #if defined(__mips__)
 #if defined(VGA_mips32)
    VexGuestMIPS32State* guest_state = (VexGuestMIPS32State*)gs;
-   UInt *addr = (UInt *)&guest_state->guest_f0;
-   UInt loVal = addr[fs];
-   UInt hiVal = addr[fs+1];
-#define ASM_VOLATILE_ROUND(fs, inst)        ASM_VOLATILE_ROUND32(fs, inst)
-#define ASM_VOLATILE_ROUND_DOUBLE(fs, inst) ASM_VOLATILE_ROUND32_DOUBLE(fs, inst)
+   UInt *addr    = (UInt *)&guest_state->guest_f0;
+   UInt loFsVal  = addr[fs];
+   UInt hiFsVal  = addr[fs+1];
+   UInt loFtVal  = addr[ft];
+   UInt hiFtVal  = addr[ft+1];
+#define ASM_VOLATILE_UNARY(inst)         ASM_VOLATILE_UNARY32(inst)
+#define ASM_VOLATILE_UNARY_DOUBLE(inst)  ASM_VOLATILE_UNARY32_DOUBLE(inst)
+#define ASM_VOLATILE_BINARY(inst)        ASM_VOLATILE_BINARY32(inst)
+#define ASM_VOLATILE_BINARY_DOUBLE(inst) ASM_VOLATILE_BINARY32_DOUBLE(inst)
 #else
    VexGuestMIPS64State* guest_state = (VexGuestMIPS64State*)gs;
    ULong *addr = (ULong *)&guest_state->guest_f0;
-#define ASM_VOLATILE_ROUND(fs, inst)        ASM_VOLATILE_ROUND64(fs, inst)
-#define ASM_VOLATILE_ROUND_DOUBLE(fs, inst) ASM_VOLATILE_ROUND64(fs, inst)
+   ULong fsVal = addr[fs];
+   ULong ftVal = addr[ft];
+#define ASM_VOLATILE_UNARY(inst)         ASM_VOLATILE_UNARY64(inst)
+#define ASM_VOLATILE_UNARY_DOUBLE(inst)  ASM_VOLATILE_UNARY64(inst)
+#define ASM_VOLATILE_BINARY(inst)        ASM_VOLATILE_BINARY64(inst)
+#define ASM_VOLATILE_BINARY_DOUBLE(inst) ASM_VOLATILE_BINARY64(inst)
 #endif
    UInt fcsr = guest_state->guest_FCSR;
    switch (inst) {
       case ROUNDWD:
-         ASM_VOLATILE_ROUND_DOUBLE(fs, round.w.d)
+         ASM_VOLATILE_UNARY_DOUBLE(round.w.d)
          break;
       case FLOORWS:
-         ASM_VOLATILE_ROUND(fs, floor.w.s)
+         ASM_VOLATILE_UNARY(floor.w.s)
          break;
       case FLOORWD:
-         ASM_VOLATILE_ROUND_DOUBLE(fs, floor.w.d)
+         ASM_VOLATILE_UNARY_DOUBLE(floor.w.d)
          break;
       case TRUNCWS:
-         ASM_VOLATILE_ROUND(fs, trunc.w.s)
+         ASM_VOLATILE_UNARY(trunc.w.s)
          break;
       case TRUNCWD:
-         ASM_VOLATILE_ROUND_DOUBLE(fs, trunc.w.d)
+         ASM_VOLATILE_UNARY_DOUBLE(trunc.w.d)
          break;
       case CEILWS:
-         ASM_VOLATILE_ROUND(fs, ceil.w.s)
+         ASM_VOLATILE_UNARY(ceil.w.s)
          break;
       case CEILWD:
-         ASM_VOLATILE_ROUND_DOUBLE(fs, ceil.w.d)
+         ASM_VOLATILE_UNARY_DOUBLE(ceil.w.d)
          break;
       case CVTDS:
-         ASM_VOLATILE_ROUND(fs, cvt.d.s)
+         ASM_VOLATILE_UNARY(cvt.d.s)
          break;
       case CVTDW:
-         ASM_VOLATILE_ROUND(fs, cvt.d.w)
+         ASM_VOLATILE_UNARY(cvt.d.w)
          break;
       case CVTSW:
-         ASM_VOLATILE_ROUND(fs, cvt.s.w)
+         ASM_VOLATILE_UNARY(cvt.s.w)
          break;
       case CVTSD:
-         ASM_VOLATILE_ROUND_DOUBLE(fs, cvt.s.d)
+         ASM_VOLATILE_UNARY_DOUBLE(cvt.s.d)
          break;
       case CVTWS:
-         ASM_VOLATILE_ROUND(fs, cvt.w.s)
+         ASM_VOLATILE_UNARY(cvt.w.s)
          break;
       case CVTWD:
-         ASM_VOLATILE_ROUND_DOUBLE(fs, cvt.w.d)
+         ASM_VOLATILE_UNARY_DOUBLE(cvt.w.d)
          break;
       case ROUNDWS:
-         ASM_VOLATILE_ROUND(fs, round.w.s)
+         ASM_VOLATILE_UNARY(round.w.s)
          break;
 #if ((__mips == 32) && defined(__mips_isa_rev) && (__mips_isa_rev >= 2)) \
     || (__mips == 64)
       case CEILLS:
-         ASM_VOLATILE_ROUND(fs, ceil.l.s)
+         ASM_VOLATILE_UNARY(ceil.l.s)
          break;
       case CEILLD:
-         ASM_VOLATILE_ROUND_DOUBLE(fs, ceil.l.d)
+         ASM_VOLATILE_UNARY_DOUBLE(ceil.l.d)
          break;
       case CVTDL:
-         ASM_VOLATILE_ROUND_DOUBLE(fs, cvt.d.l)
+         ASM_VOLATILE_UNARY_DOUBLE(cvt.d.l)
          break;
       case CVTLS:
-         ASM_VOLATILE_ROUND(fs, cvt.l.s)
+         ASM_VOLATILE_UNARY(cvt.l.s)
          break;
       case CVTLD:
-         ASM_VOLATILE_ROUND_DOUBLE(fs, cvt.l.d)
+         ASM_VOLATILE_UNARY_DOUBLE(cvt.l.d)
          break;
       case CVTSL:
-         ASM_VOLATILE_ROUND_DOUBLE(fs, cvt.s.l)
+         ASM_VOLATILE_UNARY_DOUBLE(cvt.s.l)
          break;
       case FLOORLS:
-         ASM_VOLATILE_ROUND(fs, floor.l.s)
+         ASM_VOLATILE_UNARY(floor.l.s)
          break;
       case FLOORLD:
-         ASM_VOLATILE_ROUND_DOUBLE(fs, floor.l.d)
+         ASM_VOLATILE_UNARY_DOUBLE(floor.l.d)
          break;
       case ROUNDLS:
-         ASM_VOLATILE_ROUND(fs, round.l.s)
+         ASM_VOLATILE_UNARY(round.l.s)
          break;
       case ROUNDLD:
-         ASM_VOLATILE_ROUND_DOUBLE(fs, round.l.d)
+         ASM_VOLATILE_UNARY_DOUBLE(round.l.d)
          break;
       case TRUNCLS:
-         ASM_VOLATILE_ROUND(fs, trunc.l.s)
+         ASM_VOLATILE_UNARY(trunc.l.s)
          break;
       case TRUNCLD:
-         ASM_VOLATILE_ROUND_DOUBLE(fs, trunc.l.d)
+         ASM_VOLATILE_UNARY_DOUBLE(trunc.l.d)
          break;
 #endif
+      case ADDS:
+          ASM_VOLATILE_BINARY(add.s)
+          break;
+      case ADDD:
+          ASM_VOLATILE_BINARY_DOUBLE(add.d)
+          break;
+      case SUBS:
+          ASM_VOLATILE_BINARY(sub.s)
+          break;
+      case SUBD:
+          ASM_VOLATILE_BINARY_DOUBLE(sub.d)
+          break;
+      case DIVS:
+          ASM_VOLATILE_BINARY(div.s)
+          break;
       default:
          vassert(0);
          break;
diff --git a/VEX/priv/guest_mips_toIR.c b/VEX/priv/guest_mips_toIR.c
index d22adcb09f..0d3a6a2d6d 100644
--- a/VEX/priv/guest_mips_toIR.c
+++ b/VEX/priv/guest_mips_toIR.c
@@ -1090,8 +1090,11 @@ static void putFCSR(IRExpr * e)
 
 /* fs   - fpu source register number.
    inst - fpu instruction that needs to be executed.
-   sz32 - size of source register. */
-static void calculateFCSR(UInt fs, UInt inst, Bool sz32)
+   sz32 - size of source register.
+   opN  - number of operads:
+          1 - unary operation.
+          2 - binary operation. */
+static void calculateFCSR(UInt fs, UInt ft, UInt inst, Bool sz32, UInt opN)
 {
    IRDirty *d;
    IRTemp fcsr = newTemp(Ity_I32);
@@ -1100,28 +1103,64 @@ static void calculateFCSR(UInt fs, UInt inst, Bool sz32)
    d = unsafeIRDirty_1_N(fcsr, 0,
                          "mips_dirtyhelper_calculate_FCSR",
                          &mips_dirtyhelper_calculate_FCSR,
-                         mkIRExprVec_3(IRExpr_BBPTR(),
+                         mkIRExprVec_4(IRExpr_BBPTR(),
                                        mkU32(fs),
+                                       mkU32(ft),
                                        mkU32(inst)));
 
-   /* Declare we're reading guest state. */
-   if (!mode64 && !sz32)
-      d->nFxState = 2;
-   else
-      d->nFxState = 1;
-   vex_bzero(&d->fxState, sizeof(d->fxState));
+   if (opN == 1) {  /* Unary operation. */
+      /* Declare we're reading guest state. */
+      if (!mode64 && !sz32)
+         d->nFxState = 3;
+      else
+         d->nFxState = 2;
+      vex_bzero(&d->fxState, sizeof(d->fxState));
 
-   d->fxState[0].fx     = Ifx_Read;  /* read */
-   d->fxState[0].offset = floatGuestRegOffset(fs);
-   if (mode64)
-      d->fxState[0].size   = sizeof(ULong);
-   else
+      d->fxState[0].fx     = Ifx_Read;  /* read */
+      d->fxState[0].offset = offsetof(VexGuestMIPS64State, guest_FCSR);
       d->fxState[0].size   = sizeof(UInt);
+      d->fxState[1].fx     = Ifx_Read;  /* read */
+      d->fxState[1].offset = floatGuestRegOffset(fs);
+      if (mode64)
+         d->fxState[1].size   = sizeof(ULong);
+      else
+         d->fxState[1].size   = sizeof(UInt);
+
+      if (!mode64 && !sz32) {
+         d->fxState[2].fx     = Ifx_Read;  /* read */
+         d->fxState[2].offset = floatGuestRegOffset(fs+1);
+         d->fxState[2].size   = sizeof(UInt);
+      }
+   } else if (opN == 2) {  /* Binary operation. */
+      /* Declare we're reading guest state. */
+      if (!mode64 && !sz32)
+         d->nFxState = 5;
+      else
+         d->nFxState = 3;
+      vex_bzero(&d->fxState, sizeof(d->fxState));
 
-   if (!mode64 && !sz32) {
+      d->fxState[0].fx     = Ifx_Read;  /* read */
+      d->fxState[0].offset = offsetof(VexGuestMIPS64State, guest_FCSR);
+      d->fxState[0].size   = sizeof(UInt);
       d->fxState[1].fx     = Ifx_Read;  /* read */
-      d->fxState[1].offset = floatGuestRegOffset(fs+1);
-      d->fxState[1].size   = sizeof(UInt);
+      d->fxState[1].offset = floatGuestRegOffset(fs);
+      d->fxState[2].fx     = Ifx_Read;  /* read */
+      d->fxState[2].offset = floatGuestRegOffset(ft);
+      if (mode64) {
+         d->fxState[1].size   = sizeof(ULong);
+         d->fxState[2].size   = sizeof(ULong);
+      } else {
+         d->fxState[1].size   = sizeof(UInt);
+         d->fxState[2].size   = sizeof(UInt);
+      }
+      if (!mode64 && !sz32) {
+         d->fxState[3].fx     = Ifx_Read;  /* read */
+         d->fxState[3].offset = floatGuestRegOffset(fs+1);
+         d->fxState[3].size   = sizeof(UInt);
+         d->fxState[4].fx     = Ifx_Read;  /* read */
+         d->fxState[4].offset = floatGuestRegOffset(ft+1);
+         d->fxState[4].size   = sizeof(UInt);
+      }
    }
 
    stmt(IRStmt_Dirty(d));
@@ -11859,6 +11898,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
                case 0x10:  /* S */
                   {
                      DIP("div.s f%d, f%d, f%d", fd, fs, ft);
+                     calculateFCSR(fs, ft, DIVS, False, 2);
                      IRExpr *rm = get_IR_roundingmode();
                      putFReg(fd, mkWidenFromF32(tyF, triop(Iop_DivF32, rm,
                                  getLoFromF64(tyF, getFReg(fs)),
@@ -11872,25 +11912,25 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
 
             case 0x01:  /* SUB.fmt */
                switch (fmt) {
-               case 0x11:  /* D */
-                  {
+                  case 0x11: {  /* D */
                      DIP("sub.d f%d, f%d, f%d", fd, fs, ft);
+                     calculateFCSR(fs, ft, SUBD, False, 2);
                      IRExpr *rm = get_IR_roundingmode();
                      putDReg(fd, triop(Iop_SubF64, rm, getDReg(fs),
                                        getDReg(ft)));
                      break;
                   }
-               case 0x10:  /* S */
-                  {
+                  case 0x10: {  /* S */
                      DIP("sub.s f%d, f%d, f%d", fd, fs, ft);
+                     calculateFCSR(fs, ft, SUBS, True, 2);
                      IRExpr *rm = get_IR_roundingmode();
                      putFReg(fd, mkWidenFromF32(tyF, triop(Iop_SubF32, rm,
                                  getLoFromF64(tyF, getFReg(fs)),
                                  getLoFromF64(tyF, getFReg(ft)))));
                      break;
                   }
-               default:
-                  goto decode_failure;
+                  default:
+                     goto decode_failure;
                }
                break;  /* SUB.fmt */
 
@@ -11934,7 +11974,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
                switch (fmt) {
                   case 0x10:  /* S */
                      DIP("round.l.s f%d, f%d", fd, fs);
-                     calculateFCSR(fs, ROUNDLS, True);
+                     calculateFCSR(fs, 0, ROUNDLS, True, 1);
                      t0 = newTemp(Ity_I64);
 
                      assign(t0, binop(Iop_F32toI64S, mkU32(0x0),
@@ -11944,7 +11984,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
                   break;
                   case 0x11:  /* D */
                      DIP("round.l.d f%d, f%d", fd, fs);
-                     calculateFCSR(fs, ROUNDLD, False);
+                     calculateFCSR(fs, 0, ROUNDLD, False, 1);
                      putFReg(fd, binop(Iop_RoundF64toInt, mkU32(0x0),
                                        getFReg(fs)));
                      break;
@@ -11958,7 +11998,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
                switch (fmt) {
                   case 0x10:  /* S */
                      DIP("trunc.l.s f%d, f%d", fd, fs);
-                     calculateFCSR(fs, TRUNCLS, True);
+                     calculateFCSR(fs, 0, TRUNCLS, True, 1);
                      t0 = newTemp(Ity_I64);
                      assign(t0, binop(Iop_F32toI64S, mkU32(0x3),
                                       getLoFromF64(Ity_F64, getFReg(fs))));
@@ -11967,7 +12007,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
                      break;
                   case 0x11:  /* D */
                      DIP("trunc.l.d f%d, f%d", fd, fs);
-                     calculateFCSR(fs, TRUNCLD, False);
+                     calculateFCSR(fs, 0, TRUNCLD, False, 1);
                      putFReg(fd, binop(Iop_RoundF64toInt, mkU32(0x3),
                                        getFReg(fs)));
                      break;
@@ -12275,17 +12315,18 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
 
             case 0x0:  /* add.fmt */
                switch (fmt) {
-               case 0x10:  /* S */
-                  {
-                     DIP("add.s f%d, f%d, f%d", fd, fs, ft);
-                     IRExpr *rm = get_IR_roundingmode();
-                     putFReg(fd, mkWidenFromF32(tyF, triop(Iop_AddF32, rm,
-                                 getLoFromF64(tyF, getFReg(fs)),
-                                 getLoFromF64(tyF, getFReg(ft)))));
-                     break;
-                  }
+               case 0x10: {  /* S */
+                  DIP("add.s f%d, f%d, f%d", fd, fs, ft);
+                  calculateFCSR(fs, ft, ADDS, True, 2);
+                  IRExpr *rm = get_IR_roundingmode();
+                  putFReg(fd, mkWidenFromF32(tyF, triop(Iop_AddF32, rm,
+                              getLoFromF64(tyF, getFReg(fs)),
+                              getLoFromF64(tyF, getFReg(ft)))));
+                  break;
+               }
                case 0x11: {  /* D */
                   DIP("add.d f%d, f%d, f%d", fd, fs, ft);
+                  calculateFCSR(fs, ft, ADDD, False, 2);
                   IRExpr *rm = get_IR_roundingmode();
                   putDReg(fd, triop(Iop_AddF64, rm, getDReg(fs), getDReg(ft)));
                   break;
@@ -12435,7 +12476,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
                switch (fmt) {
                   case 0x10:  /* S */
                      DIP("cvt.d.s f%d, f%d", fd, fs);
-                     calculateFCSR(fs, CVTDS, True);
+                     calculateFCSR(fs, 0, CVTDS, True, 1);
                      if (mode64) {
                         t0 = newTemp(Ity_I64);
                         t1 = newTemp(Ity_I32);
@@ -12455,7 +12496,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
 
                   case 0x14:
                      DIP("cvt.d.w %d, %d", fd, fs);
-                     calculateFCSR(fs, CVTDW, True);
+                     calculateFCSR(fs, 0, CVTDW, True, 1);
                      if (mode64) {
                         t0 = newTemp(Ity_I64);
                         t1 = newTemp(Ity_I32);
@@ -12477,7 +12518,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
                   case 0x15: {  /* L */
                      if (mode64) {
                         DIP("cvt.d.l %d, %d", fd, fs);
-                        calculateFCSR(fs, CVTDL, False);
+                        calculateFCSR(fs, 0, CVTDL, False, 1);
                         t0 = newTemp(Ity_I64);
                         assign(t0, unop(Iop_ReinterpF64asI64, getFReg(fs)));
 
@@ -12496,7 +12537,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
                switch (fmt) {
                   case 0x14:  /* W */
                      DIP("cvt.s.w %d, %d", fd, fs);
-                     calculateFCSR(fs, CVTSW, True);
+                     calculateFCSR(fs, 0, CVTSW, True, 1);
                      if (mode64) {
                         t0 = newTemp(Ity_I64);
                         t1 = newTemp(Ity_I32);
@@ -12518,7 +12559,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
 
                   case 0x11:  /* D */
                      DIP("cvt.s.d %d, %d", fd, fs);
-                     calculateFCSR(fs, CVTSD, False);
+                     calculateFCSR(fs, 0, CVTSD, False, 1);
                      if (mode64) {
                         t0 = newTemp(Ity_F32);
                         assign(t0, binop(Iop_F64toF32, get_IR_roundingmode(),
@@ -12531,7 +12572,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
 
                   case 0x15:  /* L */
                      DIP("cvt.s.l %d, %d", fd, fs);
-                     calculateFCSR(fs, CVTSL, False);
+                     calculateFCSR(fs, 0, CVTSL, False, 1);
                      t0 = newTemp(Ity_I64);
                      assign(t0, unop(Iop_ReinterpF64asI64, getFReg(fs)));
 
@@ -12548,7 +12589,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
                switch (fmt) {
                case 0x10:  /* S */
                   DIP("cvt.w.s %d, %d", fd, fs);
-                  calculateFCSR(fs, CVTWS, True);
+                  calculateFCSR(fs, 0, CVTWS, True, 1);
                   if (mode64) {
                      putFReg(fd, mkWidenFromF32(tyF, binop(Iop_RoundF32toInt,
                              get_IR_roundingmode(), getLoFromF64(tyF,
@@ -12560,7 +12601,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
 
                case 0x11:
                   DIP("cvt.w.d %d, %d", fd, fs);
-                  calculateFCSR(fs, CVTWD, False);
+                  calculateFCSR(fs, 0, CVTWD, False, 1);
                   if (mode64) {
                      t0 = newTemp(Ity_I32);
                      t1 = newTemp(Ity_F32);
@@ -12588,7 +12629,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
                switch (fmt) {
                   case 0x10:  /* S */
                      DIP("cvt.l.s %d, %d", fd, fs);
-                     calculateFCSR(fs, CVTLS, True);
+                     calculateFCSR(fs, 0, CVTLS, True, 1);
                      t0 = newTemp(Ity_I64);
 
                      assign(t0, binop(Iop_F32toI64S, get_IR_roundingmode(),
@@ -12599,7 +12640,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
 
                   case 0x11: {  /* D */
                      DIP("cvt.l.d %d, %d", fd, fs);
-                     calculateFCSR(fs, CVTLD, False);
+                     calculateFCSR(fs, 0, CVTLD, False, 1);
                      putFReg(fd, binop(Iop_RoundF64toInt,
                              get_IR_roundingmode(), getFReg(fs)));
                      break;
@@ -12614,7 +12655,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
                switch (fmt) {
                   case 0x10:  /* S */
                      DIP("floor.l.s %d, %d", fd, fs);
-                     calculateFCSR(fs, FLOORLS, True);
+                     calculateFCSR(fs, 0, FLOORLS, True, 1);
                      t0 = newTemp(Ity_I64);
 
                      assign(t0, binop(Iop_F32toI64S, mkU32(0x1),
@@ -12625,7 +12666,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
 
                   case 0x11:  /* D */
                      DIP("floor.l.d %d, %d", fd, fs);
-                     calculateFCSR(fs, FLOORLD, False);
+                     calculateFCSR(fs, 0, FLOORLD, False, 1);
                      putFReg(fd, binop(Iop_RoundF64toInt, mkU32(0x1),
                                        getFReg(fs)));
                      break;
@@ -12638,7 +12679,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
                switch (fmt) {
                   case 0x10:  /* S */
                      DIP("round.w.s f%d, f%d", fd, fs);
-                     calculateFCSR(fs, ROUNDWS, True);
+                     calculateFCSR(fs, 0, ROUNDWS, True, 1);
                      if (mode64) {
                         t0 = newTemp(Ity_I64);
                         t1 = newTemp(Ity_I32);
@@ -12662,7 +12703,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
 
                   case 0x11:  /* D */
                      DIP("round.w.d f%d, f%d", fd, fs);
-                     calculateFCSR(fs, ROUNDWD, False);
+                     calculateFCSR(fs, 0, ROUNDWD, False, 1);
                      if (mode64) {
                         t0 = newTemp(Ity_I32);
                         assign(t0, binop(Iop_F64toI32S, mkU32(0x0),
@@ -12688,7 +12729,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
                switch (fmt) {
                   case 0x10:  /* S */
                      DIP("floor.w.s f%d, f%d", fd, fs);
-                     calculateFCSR(fs, FLOORWS, True);
+                     calculateFCSR(fs, 0, FLOORWS, True, 1);
                      if (mode64) {
                         t0 = newTemp(Ity_I64);
                         t1 = newTemp(Ity_I32);
@@ -12712,7 +12753,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
 
                   case 0x11:  /* D */
                      DIP("floor.w.d f%d, f%d", fd, fs);
-                     calculateFCSR(fs, FLOORWD, False);
+                     calculateFCSR(fs, 0, FLOORWD, False, 1);
                      if (mode64) {
                         t0 = newTemp(Ity_I32);
                         assign(t0, binop(Iop_F64toI32S, mkU32(0x1),
@@ -12739,7 +12780,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
                switch (fmt) {
                   case 0x10:  /* S */
                      DIP("trunc.w.s %d, %d", fd, fs);
-                     calculateFCSR(fs, TRUNCWS, True);
+                     calculateFCSR(fs, 0, TRUNCWS, True, 1);
                      if (mode64) {
                         t0 = newTemp(Ity_I64);
                         t1 = newTemp(Ity_I32);
@@ -12762,7 +12803,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
                      break;
                   case 0x11:  /* D */
                      DIP("trunc.w.d %d, %d", fd, fs);
-                     calculateFCSR(fs, TRUNCWD, False);
+                     calculateFCSR(fs, 0, TRUNCWD, False, 1);
                      if (mode64) {
                         t0 = newTemp(Ity_I32);
 
@@ -12790,7 +12831,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
                switch (fmt) {
                   case 0x10:  /* S */
                      DIP("ceil.w.s %d, %d", fd, fs);
-                     calculateFCSR(fs, CEILWS, True);
+                     calculateFCSR(fs, 0, CEILWS, True, 1);
                      if (mode64) {
                         t0 = newTemp(Ity_I64);
                         t1 = newTemp(Ity_I32);
@@ -12814,7 +12855,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
 
                   case 0x11:  /* D */
                      DIP("ceil.w.d %d, %d", fd, fs);
-                     calculateFCSR(fs, CEILWD, False);
+                     calculateFCSR(fs, 0, CEILWD, False, 1);
                      if (!mode64) {
                         t0 = newTemp(Ity_I32);
                         assign(t0, binop(Iop_F64toI32S, mkU32(0x2),
@@ -12838,7 +12879,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
                switch (fmt) {
                   case 0x10:  /* S */
                      DIP("ceil.l.s %d, %d", fd, fs);
-                     calculateFCSR(fs, CEILLS, True);
+                     calculateFCSR(fs, 0, CEILLS, True, 1);
                      t0 = newTemp(Ity_I64);
 
                      assign(t0, binop(Iop_F32toI64S, mkU32(0x2),
@@ -12849,7 +12890,7 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
 
                   case 0x11:  /* D */
                      DIP("ceil.l.d %d, %d", fd, fs);
-                     calculateFCSR(fs, CEILLD, False);
+                     calculateFCSR(fs, 0, CEILLD, False, 1);
                      putFReg(fd, binop(Iop_RoundF64toInt, mkU32(0x2),
                                        getFReg(fs)));
                      break;
diff --git a/VEX/priv/host_mips_isel.c b/VEX/priv/host_mips_isel.c
index d0017c8726..346e41120e 100644
--- a/VEX/priv/host_mips_isel.c
+++ b/VEX/priv/host_mips_isel.c
@@ -3226,7 +3226,6 @@ static HReg iselFltExpr_wrk(ISelEnv * env, IRExpr * e)
 
          case Iop_SqrtF32:
          case Iop_SqrtF64: {
-            /* first arg is rounding mode; we ignore it. */
             Bool sz32 = e->Iex.Binop.op == Iop_SqrtF32;
             HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
             HReg dst = newVRegF(env);
@@ -3455,10 +3454,11 @@ static HReg iselDblExpr_wrk(ISelEnv * env, IRExpr * e)
          }
 
          case Iop_SqrtF64: {
-            /* first arg is rounding mode; we ignore it. */
             HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
             HReg dst = newVRegD(env);
+            set_MIPS_rounding_mode(env, e->Iex.Binop.arg1);
             addInstr(env, MIPSInstr_FpUnary(Mfp_SQRTD, dst, src));
+            set_MIPS_rounding_default(env);
             return dst;
          }
 
@@ -3483,6 +3483,9 @@ static HReg iselDblExpr_wrk(ISelEnv * env, IRExpr * e)
                case Iop_DivF64:
                   op = Mfp_DIVD;
                   break;
+               case Iop_DivF32:
+                  op = Mfp_DIVS;
+                  break;
                case Iop_MulF64:
                   op = Mfp_MULD;
                   break;
@@ -3495,7 +3498,9 @@ static HReg iselDblExpr_wrk(ISelEnv * env, IRExpr * e)
                default:
                   vassert(0);
             }
+            set_MIPS_rounding_mode(env, e->Iex.Triop.details->arg1);
             addInstr(env, MIPSInstr_FpBinary(op, dst, argL, argR));
+            set_MIPS_rounding_default(env);
             return dst;
          }
          default: