]> git.ipfire.org Git - thirdparty/valgrind.git/commitdiff
mips: MSA support for mips32/mips64.
authorPetar Jovanovic <mips32r2@gmail.com>
Tue, 24 Oct 2017 16:00:28 +0000 (18:00 +0200)
committerPetar Jovanovic <mips32r2@gmail.com>
Fri, 27 Oct 2017 14:27:24 +0000 (16:27 +0200)
Full support of MIPS SIMD Architecture Module (MSA) instruction set.

Following IOPs have been implemented using generation of MSA instructions:

  Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4, Iop_GetElem64x2,
  Iop_V128to32, Iop_V128HIto64, Iop_V128to64, Iop_F32toF16x4, Iop_Abs64x2,
  Iop_Abs32x4, Iop_Abs16x8, Iop_Abs8x16, Iop_Cnt8x16, Iop_NotV128,
  Iop_Reverse8sIn16_x8, Iop_Reverse8sIn32_x4, Iop_Reverse8sIn64_x2,
  Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4, Iop_Clz8x16, Iop_Clz16x8,
  Iop_Clz32x4, Iop_Clz64x2, Iop_Abs32Fx4, Iop_Abs64Fx2, Iop_RecipEst32Fx4,
  Iop_RecipEst64Fx2, Iop_RSqrtEst32Fx4, Iop_RSqrtEst64Fx2, Iop_F16toF32x4,
  Iop_I32UtoFx4, Iop_FtoI32Sx4_RZ, Iop_FtoI32Ux4_RZ, Iop_Add8x16,
  Iop_Add16x8, Iop_Add32x4, Iop_Add64x2, Iop_Sub8x16, Iop_Sub16x8,
  Iop_Sub32x4, Iop_Sub64x2, Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4,
  Iop_QAdd64Sx2, Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4,
  Iop_QAdd64Ux2, Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4,
  Iop_QSub64Sx2, Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4,
  Iop_QSub64Ux2, Iop_QDMulHi32Sx4, Iop_QDMulHi16Sx8, Iop_QRDMulHi32Sx4,
  Iop_QRDMulHi16Sx8, Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2,
  Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2, Iop_Min8Sx16,
  Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2, Iop_Min8Ux16, Iop_Min16Ux8,
  Iop_Min32Ux4, Iop_Min64Ux2, Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4,
  Iop_Shl64x2, Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4, Iop_Shr64x2,
  Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4, Iop_Sar64x2, Iop_InterleaveHI8x16,
  Iop_InterleaveHI16x8, Iop_InterleaveHI32x4, Iop_InterleaveHI64x2,
  Iop_InterleaveLO8x16, Iop_InterleaveLO16x8, Iop_InterleaveLO32x4,
  Iop_InterleaveLO64x2, Iop_InterleaveEvenLanes8x16,
  Iop_InterleaveEvenLanes16x8, Iop_InterleaveEvenLanes32x4,
  Iop_InterleaveOddLanes8x16, Iop_InterleaveOddLanes16x8,
  Iop_InterleaveOddLanes32x4, Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4,
  Iop_CmpEQ64x2, Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4,
  Iop_CmpGT64Sx2, Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4,
  Iop_CmpGT64Ux2, Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4, Iop_Avg8Ux16,
  Iop_Avg16Ux8, Iop_Avg32Ux4, Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4,
  Iop_AndV128, Iop_OrV128, Iop_XorV128, Iop_ShrV128, Iop_ShlV128,
  Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2, Iop_SarN8x16,
  Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2, Iop_ShrN8x16, Iop_ShrN16x8,
  Iop_ShrN32x4, Iop_ShrN64x2, Iop_QandQSarNnarrow64Sto32Sx2,
  Iop_QandQSarNnarrow32Sto16Sx4, Iop_QandQRSarNnarrow64Sto32Sx2,
  Iop_QandQRSarNnarrow32Sto16Sx4, Iop_CmpEQ32Fx4, Iop_CmpEQ64Fx2,
  Iop_CmpLT32Fx4, Iop_CmpLT64Fx2, Iop_CmpLE32Fx4, Iop_CmpLE64Fx2,
  Iop_CmpUN32Fx4, Iop_CmpUN64Fx2, Iop_64HLtoV128, Iop_Min32Fx4,
  Iop_Min64Fx2, Iop_Max32Fx4, Iop_Max64Fx2, Iop_Sqrt32Fx4,
  Iop_Sqrt64Fx2, Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4,
  Iop_Sub64Fx2, Iop_Mul32Fx4, Iop_Mul64Fx2, Iop_Div32Fx4,
  Iop_Div64Fx2, Iop_F32x4_2toQ16x8, Iop_F64x2_2toQ32x4,
  Iop_ScaleF64, Scale2_64Fx2, Scale2_32Fx4, Iop_Log2_32Fx4, Iop_Log2_64Fx2,
  Iop_PackOddLanes8x16, Iop_PackEvenLanes8x16, Iop_PackOddLanes16x8,
  Iop_PackEvenLanes16x8, Iop_PackOddLanes32x4, Iop_PackEvenLanes32x4.

Folowing IOPs have been implemented without generating MSA instructions:

  Iop_CmpEQ8, Iop_MullU8, Iop_MullS8, Iop_MullU16, Iop_MullS16, Iop_DivS32,
  Iop_DivU32, Iop_DivS64, Iop_DivU64, Iop_F32toI32U, Iop_F64toI64U,
  Iop_I64UtoF64

Imlementation of the following IOPs has been changed in order to use MSA
when it is possible:

  Iop_MAddF64, Iop_MSubF32, Iop_MSubF64.

Contributed by:
  Tamara Vlahovic, Aleksandar Rikalo and Aleksandra Karadzic.

Related BZ issue - #382563.

VEX/priv/guest_mips_defs.h
VEX/priv/guest_mips_helpers.c
VEX/priv/guest_mips_toIR.c
VEX/priv/host_mips_defs.c
VEX/priv/host_mips_defs.h
VEX/priv/host_mips_isel.c
VEX/priv/main_main.c
VEX/pub/libvex.h
VEX/pub/libvex_guest_mips32.h
VEX/pub/libvex_guest_mips64.h

index 5ea213d2223ee38abd3aea4b8c192db89077bd1d..6ee6728d4c57b7353730a3a3bddaf1724327e3f5 100644 (file)
@@ -94,6 +94,20 @@ typedef enum {
    SUBS,     SUBD,    DIVS
 } flt_op;
 
+typedef enum {
+   FADDW=0, FADDD, FSUBW, FSUBD, FMULW, FMULD, FDIVW, FDIVD, FMADDW, FMADDD,
+   FCAFD, FCAFW, FSAFD, FSAFW, FCEQD, FCEQW, FSEQD, FSEQW, FCLTD, FCLTW, FSLTD,
+   FSLTW, FCLED, FCLEW, FSLED, FSLEW, FCNED, FCNEW, FSNED, FSNEW, FCUND, FCUNW,
+   FSUND, FSUNW, FCORD, FCORW, FSORD, FSORW, FCUEQD, FCUEQW, FSUEQD, FSUEQW,
+   FCUNED, FCUNEW, FSUNED, FSUNEW, FCULED, FCULEW, FSULED, FSULEW, FCULTD,
+   FCULTW, FSULTD, FSULTW, FEXP2W, FEXP2D, FMINW, FMIND, FMINAW, FMINAD, FMAXW,
+   FMAXD, FMAXAW, FMAXAD, FFINTSW, FFINTSD, FRCPW, FRCPD, FRSQRTW, FRSQRTD,
+   FSQRTW, FSQRTD, FRINTW, FRINTD, FTRUNCUW, FTRUNCUD, FTRUNCSW, FTRUNCSD,
+   FEXDOH, FEXDOW, FEXUPRD, FEXUPRW, FEXUPLD, FEXUPLW, FLOG2W, FLOG2D,
+   FTQH, FTQW, FFQRW, FFQRD,FFQLW, FFQLD, FTINT_SW, FTINT_SD,
+   FTINT_UW, FTINT_UD, FFINT_UW, FFINT_UD,
+} msa_flt_op;
+
 #if defined (_MIPSEL)
    #define MIPS_IEND Iend_LE
 #else
@@ -109,6 +123,11 @@ extern UInt mips_dirtyhelper_calculate_FCSR_fp32 ( void* guest_state, UInt fs,
 extern UInt mips_dirtyhelper_calculate_FCSR_fp64 ( void* guest_state, UInt fs,
                                                    UInt ft, flt_op op );
 
+extern UInt mips_dirtyhelper_calculate_MSACSR ( void* gs, UInt ws, UInt wt,
+                                                msa_flt_op inst );
+extern UInt mips_dirtyhelper_get_MSAIR ( void );
+
+
 /*---------------------------------------------------------*/
 /*---               Condition code stuff                ---*/
 /*---------------------------------------------------------*/
index 00a92c3ac73bc6ece73305ca3d54d3803cabc421..3f21512a823eb66b8794ece0f654165dc00bad67 100644 (file)
 #include "guest_generic_bb_to_IR.h"
 #include "guest_mips_defs.h"
 
+#if defined (__GNUC__)
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#else
+#define GCC_VERSION 0
+#endif
+
 /* This file contains helper functions for mips guest code.  Calls to
    these functions are generated by the back end.
 */
@@ -176,6 +182,13 @@ void LibVEX_GuestMIPS32_initialise( /*OUT*/ VexGuestMIPS32State * vex_state)
    vex_state->guest_ac1 = 0;          /* Accumulator 1 */
    vex_state->guest_ac2 = 0;          /* Accumulator 2 */
    vex_state->guest_ac3 = 0;          /* Accumulator 3 */
+
+   vex_state->guest_w0.w64[0] = 0;
+   vex_state->guest_w0.w64[1] = 0;
+   vex_state->guest_w1.w64[0] = 0;
+   vex_state->guest_w1.w64[1] = 0;
+   vex_state->guest_w2.w64[0] = 0;
+   vex_state->guest_w2.w64[1] = 0;
 }
 
 void LibVEX_GuestMIPS64_initialise ( /*OUT*/ VexGuestMIPS64State * vex_state )
@@ -282,6 +295,8 @@ void LibVEX_GuestMIPS64_initialise ( /*OUT*/ VexGuestMIPS64State * vex_state )
 
    vex_state->guest_LLaddr = 0xFFFFFFFFFFFFFFFFULL;
    vex_state->guest_LLdata = 0;
+
+   vex_state->guest_MSACSR = 0;
 }
 
 /*-----------------------------------------------------------*/
@@ -511,6 +526,25 @@ HWord mips_dirtyhelper_rdhwr ( UInt rd )
                     : "t0", "$f24"                                  \
                    );
 
+#define ASM_VOLATILE_MSA_UNARY(inst)                                \
+   __asm__ volatile(".set  push"              "\n\t"                \
+                    ".set  mips32r2"          "\n\t"                \
+                    ".set  hardfloat"         "\n\t"                \
+                    ".set  fp=64"             "\n\t"                \
+                    ".set  msa"               "\n\t"                \
+                    ".set  noreorder"         "\n\t"                \
+                    "cfcmsa  $t0,  $1"        "\n\t"                \
+                    "ctcmsa  $1,   %2"        "\n\t"                \
+                    "ld.b  $w24, 0(%1)"       "\n\t"                \
+                    #inst" $w24, $w24"        "\n\t"                \
+                    "cfcmsa  %0,   $1"        "\n\t"                \
+                    "ctcmsa  $1,  $t0"        "\n\t"                \
+                    ".set  pop"               "\n\t"                \
+                    : "=r" (ret)                                    \
+                    : "r" (&(addr[ws])), "r" (msacsr)               \
+                    : "t0"                                          \
+                   );
+
 #define ASM_VOLATILE_BINARY32(inst)                                 \
    __asm__ volatile(".set  push"              "\n\t"                \
                     ".set  hardfloat"         "\n\t"                \
@@ -559,6 +593,25 @@ HWord mips_dirtyhelper_rdhwr ( UInt rd )
                     : "t0", "$f24", "$f26"                              \
                    );
 
+#define ASM_VOLATILE_MSA_BINARY(inst)                                   \
+   __asm__ volatile(".set  push"              "\n\t"                    \
+                    ".set  mips32r2"          "\n\t"                    \
+                    ".set  hardfloat"         "\n\t"                    \
+                    ".set  fp=64"             "\n\t"                    \
+                    ".set  msa"               "\n\t"                    \
+                    "cfcmsa $t0,  $1"         "\n\t"                    \
+                    "ctcmsa  $1,  %3"         "\n\t"                    \
+                    "ld.b  $w24, 0(%1)"       "\n\t"                    \
+                    "ld.b  $w26, 0(%2)"       "\n\t"                    \
+                    #inst" $w24, $w24, $w26"  "\n\t"                    \
+                    "cfcmsa %0,   $1"         "\n\t"                    \
+                    "ctcmsa $1,  $t0"         "\n\t"                    \
+                    ".set  pop"               "\n\t"                    \
+                    : "=r" (ret)                                        \
+                    : "r" (&(addr[ws])), "r" (&(addr[wt])), "r" (msacsr)\
+                    : "t0"                                              \
+                   );
+
 /* TODO: Add cases for all fpu instructions because all fpu instructions are
          change the value of FCSR register. */
 extern UInt mips_dirtyhelper_calculate_FCSR_fp32 ( void* gs, UInt fs, UInt ft,
@@ -767,6 +820,436 @@ extern UInt mips_dirtyhelper_calculate_FCSR_fp64 ( void* gs, UInt fs, UInt ft,
    return ret;
 }
 
+
+extern UInt mips_dirtyhelper_calculate_MSACSR ( void* gs, UInt ws, UInt wt,
+                                                msa_flt_op inst ) {
+   UInt ret = 0;
+/* GCC 4.8 and later support MIPS MSA. */
+#if defined(__mips__) && (defined(__clang__) || (GCC_VERSION >= 408))
+#if defined(VGA_mips32)
+   VexGuestMIPS32State* guest_state = (VexGuestMIPS32State*)gs;
+#else
+   VexGuestMIPS64State* guest_state = (VexGuestMIPS64State*)gs;
+#endif
+   V128 *addr = (V128 *)&guest_state->guest_w0;
+   UInt msacsr   = guest_state->guest_MSACSR;
+
+   switch (inst) {
+      case FADDW:
+         ASM_VOLATILE_MSA_BINARY(fadd.w)
+         break;
+
+      case FADDD:
+         ASM_VOLATILE_MSA_BINARY(fadd.d)
+         break;
+
+      case FSUBW:
+         ASM_VOLATILE_MSA_BINARY(fsub.w);
+         break;
+
+      case FSUBD:
+         ASM_VOLATILE_MSA_BINARY(fsub.d);
+         break;
+
+      case FMULW:
+         ASM_VOLATILE_MSA_BINARY(fmul.w);
+         break;
+
+      case FMULD:
+         ASM_VOLATILE_MSA_BINARY(fmul.d);
+         break;
+
+      case FDIVW:
+         ASM_VOLATILE_MSA_BINARY(fdiv.w);
+         break;
+
+      case FDIVD:
+         ASM_VOLATILE_MSA_BINARY(fdiv.d);
+         break;
+
+      case FMADDW:
+         ASM_VOLATILE_MSA_BINARY(fmadd.w);
+         break;
+
+      case FMADDD:
+         ASM_VOLATILE_MSA_BINARY(fmadd.d);
+         break;
+
+      case FCAFW:
+         ASM_VOLATILE_MSA_BINARY(fcaf.w);
+         break;
+
+      case FCAFD:
+         ASM_VOLATILE_MSA_BINARY(fcaf.d);
+         break;
+
+      case FSAFW:
+         ASM_VOLATILE_MSA_BINARY(fsaf.w);
+         break;
+
+      case FSAFD:
+         ASM_VOLATILE_MSA_BINARY(fsaf.d);
+         break;
+
+      case FCEQW:
+         ASM_VOLATILE_MSA_BINARY(fceq.w);
+         break;
+
+      case FCEQD:
+         ASM_VOLATILE_MSA_BINARY(fceq.d);
+         break;
+
+      case FSEQW:
+         ASM_VOLATILE_MSA_BINARY(fseq.w);
+         break;
+
+      case FSEQD:
+         ASM_VOLATILE_MSA_BINARY(fseq.d);
+         break;
+
+      case FCLTW:
+         ASM_VOLATILE_MSA_BINARY(fclt.w);
+         break;
+
+      case FCLTD:
+         ASM_VOLATILE_MSA_BINARY(fclt.d);
+         break;
+
+      case FSLTW:
+         ASM_VOLATILE_MSA_BINARY(fslt.w);
+         break;
+
+      case FSLTD:
+         ASM_VOLATILE_MSA_BINARY(fslt.d);
+         break;
+
+      case FCLEW:
+         ASM_VOLATILE_MSA_BINARY(fcle.w);
+         break;
+
+      case FCLED:
+         ASM_VOLATILE_MSA_BINARY(fcle.d);
+         break;
+
+      case FSLEW:
+         ASM_VOLATILE_MSA_BINARY(fsle.w);
+         break;
+
+      case FSLED:
+         ASM_VOLATILE_MSA_BINARY(fsle.d);
+         break;
+
+      case FCNEW:
+         ASM_VOLATILE_MSA_BINARY(fcne.w);
+         break;
+
+      case FCNED:
+         ASM_VOLATILE_MSA_BINARY(fcne.d);
+         break;
+
+      case FSNEW:
+         ASM_VOLATILE_MSA_BINARY(fsne.w);
+         break;
+
+      case FSNED:
+         ASM_VOLATILE_MSA_BINARY(fsne.d);
+         break;
+
+      case FEXP2W:
+         ASM_VOLATILE_MSA_BINARY(fexp2.w);
+         break;
+
+      case FEXP2D:
+         ASM_VOLATILE_MSA_BINARY(fexp2.d);
+         break;
+
+      case FMINW:
+         ASM_VOLATILE_MSA_BINARY(fmin.w);
+         break;
+
+      case FMIND:
+         ASM_VOLATILE_MSA_BINARY(fmin.d);
+         break;
+
+      case FMINAW:
+         ASM_VOLATILE_MSA_BINARY(fmin_a.w);
+         break;
+
+      case FMINAD:
+         ASM_VOLATILE_MSA_BINARY(fmin_a.d);
+         break;
+
+      case FCUNW:
+         ASM_VOLATILE_MSA_BINARY(fcun.w);
+         break;
+
+      case FCUND:
+         ASM_VOLATILE_MSA_BINARY(fcun.d);
+         break;
+
+      case FSUNW:
+         ASM_VOLATILE_MSA_BINARY(fsun.w);
+         break;
+
+      case FSUND:
+         ASM_VOLATILE_MSA_BINARY(fsun.d);
+         break;
+
+      case FCORW:
+         ASM_VOLATILE_MSA_BINARY(fcor.w);
+         break;
+
+      case FCORD:
+         ASM_VOLATILE_MSA_BINARY(fcor.d);
+         break;
+
+      case FSORW:
+         ASM_VOLATILE_MSA_BINARY(fsor.w);
+         break;
+
+      case FSORD:
+         ASM_VOLATILE_MSA_BINARY(fsor.d);
+         break;
+
+      case FCUEQW:
+         ASM_VOLATILE_MSA_BINARY(fcueq.w);
+         break;
+
+      case FCUEQD:
+         ASM_VOLATILE_MSA_BINARY(fcueq.d);
+         break;
+
+      case FSUEQW:
+         ASM_VOLATILE_MSA_BINARY(fsueq.w);
+         break;
+
+      case FSUEQD:
+         ASM_VOLATILE_MSA_BINARY(fsueq.d);
+         break;
+
+      case FCUNEW:
+         ASM_VOLATILE_MSA_BINARY(fcune.w);
+         break;
+
+      case FCUNED:
+         ASM_VOLATILE_MSA_BINARY(fcune.d);
+         break;
+
+      case FSUNEW:
+         ASM_VOLATILE_MSA_BINARY(fsune.w);
+         break;
+
+      case FSUNED:
+         ASM_VOLATILE_MSA_BINARY(fsune.d);
+         break;
+
+      case FCULEW:
+         ASM_VOLATILE_MSA_BINARY(fcule.w);
+         break;
+
+      case FCULED:
+         ASM_VOLATILE_MSA_BINARY(fcule.d);
+         break;
+
+      case FSULEW:
+         ASM_VOLATILE_MSA_BINARY(fsule.w);
+         break;
+
+      case FSULED:
+         ASM_VOLATILE_MSA_BINARY(fsule.d);
+         break;
+
+      case FCULTW:
+         ASM_VOLATILE_MSA_BINARY(fcult.w);
+         break;
+
+      case FCULTD:
+         ASM_VOLATILE_MSA_BINARY(fcult.d);
+         break;
+
+      case FSULTW:
+         ASM_VOLATILE_MSA_BINARY(fsult.w);
+         break;
+
+      case FSULTD:
+         ASM_VOLATILE_MSA_BINARY(fsult.d);
+
+      case FMAXW:
+         ASM_VOLATILE_MSA_BINARY(fmax.w);
+         break;
+
+      case FMAXD:
+         ASM_VOLATILE_MSA_BINARY(fmax.d);
+         break;
+
+      case FMAXAW:
+         ASM_VOLATILE_MSA_BINARY(fmax_a.w);
+         break;
+
+      case FMAXAD:
+         ASM_VOLATILE_MSA_BINARY(fmax_a.d);
+         break;
+
+      case FFINTSW:
+         ASM_VOLATILE_MSA_UNARY(ffint_s.w);
+         break;
+
+      case FFINTSD:
+         ASM_VOLATILE_MSA_UNARY(ffint_s.d);
+         break;
+
+      case FRCPW:
+         ASM_VOLATILE_MSA_UNARY(frcp.w);
+         break;
+
+      case FRCPD:
+         ASM_VOLATILE_MSA_UNARY(frcp.d);
+         break;
+
+      case FRSQRTW:
+         ASM_VOLATILE_MSA_UNARY(frsqrt.w);
+         break;
+
+      case FRSQRTD:
+         ASM_VOLATILE_MSA_UNARY(frsqrt.d);
+         break;
+
+      case FSQRTW:
+         ASM_VOLATILE_MSA_UNARY(fsqrt.w);
+         break;
+
+      case FSQRTD:
+         ASM_VOLATILE_MSA_UNARY(fsqrt.d);
+         break;
+
+      case FRINTW:
+         ASM_VOLATILE_MSA_UNARY(frint.w);
+         break;
+
+      case FRINTD:
+         ASM_VOLATILE_MSA_UNARY(frint.d);
+
+      case FTRUNCUW:
+         ASM_VOLATILE_MSA_UNARY(ftrunc_u.w);
+         break;
+
+      case FTRUNCUD:
+         ASM_VOLATILE_MSA_UNARY(ftrunc_u.d);
+         break;
+
+      case FTRUNCSW:
+         ASM_VOLATILE_MSA_UNARY(ftrunc_s.w);
+         break;
+
+      case FTRUNCSD:
+         ASM_VOLATILE_MSA_UNARY(ftrunc_s.d);
+         break;
+
+      case FEXDOH:
+         ASM_VOLATILE_MSA_BINARY(fexdo.h);
+         break;
+
+      case FEXDOW:
+         ASM_VOLATILE_MSA_BINARY(fexdo.w);
+         break;
+
+      case FEXUPRW:
+         ASM_VOLATILE_MSA_UNARY(fexupr.w);
+         break;
+
+      case FEXUPRD:
+         ASM_VOLATILE_MSA_UNARY(fexupr.d);
+         break;
+
+      case FEXUPLW:
+         ASM_VOLATILE_MSA_UNARY(fexupl.w);
+         break;
+
+      case FEXUPLD:
+         ASM_VOLATILE_MSA_UNARY(fexupl.d);
+         break;
+
+      case FTQH:
+         ASM_VOLATILE_MSA_BINARY(ftq.h);
+         break;
+
+      case FTQW:
+         ASM_VOLATILE_MSA_BINARY(ftq.w);
+         break;
+
+      case FFQRD:
+         ASM_VOLATILE_MSA_UNARY(ffqr.d);
+         break;
+
+      case FFQRW:
+         ASM_VOLATILE_MSA_UNARY(ffqr.w);
+         break;
+
+      case FFQLD:
+         ASM_VOLATILE_MSA_UNARY(ffql.d);
+         break;
+
+      case FFQLW:
+         ASM_VOLATILE_MSA_UNARY(ffql.w);
+         break;
+
+      case FTINT_SD:
+         ASM_VOLATILE_MSA_UNARY(ftint_s.d);
+         break;
+
+      case FTINT_SW:
+         ASM_VOLATILE_MSA_UNARY(ftint_s.w);
+         break;
+
+      case FTINT_UD:
+         ASM_VOLATILE_MSA_UNARY(ftint_u.d);
+         break;
+
+      case FTINT_UW:
+         ASM_VOLATILE_MSA_UNARY(ftint_u.w);
+         break;
+
+      case FLOG2D:
+         ASM_VOLATILE_MSA_UNARY(flog2.d);
+         break;
+
+      case FLOG2W:
+         ASM_VOLATILE_MSA_UNARY(flog2.w);
+         break;
+
+      case FFINT_UD:
+         ASM_VOLATILE_MSA_UNARY(ffint_u.d);
+         break;
+
+      case FFINT_UW:
+         ASM_VOLATILE_MSA_UNARY(ffint_u.w);
+         break;
+   }
+
+#endif
+   return ret;
+}
+
+extern UInt mips_dirtyhelper_get_MSAIR() {
+   UInt ret = 0;
+/* GCC 4.8 and later support MIPS MSA. */
+#if defined(__mips__) && (defined(__clang__) || (GCC_VERSION >= 408))
+   __asm__ volatile(".set push        \n\t"
+                    ".set mips32r2    \n\t"
+                    ".set hardfloat   \n\t"
+                    ".set fp=64       \n\t"
+                    ".set msa         \n\t"
+                    ".set noreorder   \n\t"
+                    "cfcmsa  %0, $0   \n\t"
+                    ".set pop         \n\t"
+                    : "=r" (ret) : : );
+#endif
+   return ret;
+}
+
+
+
+
 /*---------------------------------------------------------------*/
 /*--- end                                guest_mips_helpers.c ---*/
 /*---------------------------------------------------------------*/
index d5215f1043740ec657bb1c54a448cdcc8f1ebae0..0063ae3026a65010f243f6db1c7ba065c767f1b1 100644 (file)
@@ -75,6 +75,9 @@ static Bool mode64 = False;
 /* CPU has FPU and 32 dbl. prec. FP registers. */
 static Bool fp_mode64 = False;
 
+/* CPU has MSA unit */
+static Bool has_msa = False;
+
 /* Define 1.0 in single and double precision. */
 #define ONE_SINGLE 0x3F800000
 #define ONE_DOUBLE 0x3FF0000000000000ULL
@@ -421,6 +424,286 @@ static UInt accumulatorGuestRegOffset(UInt acNo)
    return ret;
 }
 
+/* ---------------- MIPS32 MSA registers ---------------- */
+
+static UInt msaGuestRegOffset(UInt msaRegNo) {
+   vassert(msaRegNo <= 31);
+   UInt ret;
+
+   if (mode64) {
+      switch (msaRegNo) {
+         case 0:
+            ret = offsetof(VexGuestMIPS64State, guest_w0);
+            break;
+
+         case 1:
+            ret = offsetof(VexGuestMIPS64State, guest_w1);
+            break;
+
+         case 2:
+            ret = offsetof(VexGuestMIPS64State, guest_w2);
+            break;
+
+         case 3:
+            ret = offsetof(VexGuestMIPS64State, guest_w3);
+            break;
+
+         case 4:
+            ret = offsetof(VexGuestMIPS64State, guest_w4);
+            break;
+
+         case 5:
+            ret = offsetof(VexGuestMIPS64State, guest_w5);
+            break;
+
+         case 6:
+            ret = offsetof(VexGuestMIPS64State, guest_w6);
+            break;
+
+         case 7:
+            ret = offsetof(VexGuestMIPS64State, guest_w7);
+            break;
+
+         case 8:
+            ret = offsetof(VexGuestMIPS64State, guest_w8);
+            break;
+
+         case 9:
+            ret = offsetof(VexGuestMIPS64State, guest_w9);
+            break;
+
+         case 10:
+            ret = offsetof(VexGuestMIPS64State, guest_w10);
+            break;
+
+         case 11:
+            ret = offsetof(VexGuestMIPS64State, guest_w11);
+            break;
+
+         case 12:
+            ret = offsetof(VexGuestMIPS64State, guest_w12);
+            break;
+
+         case 13:
+            ret = offsetof(VexGuestMIPS64State, guest_w13);
+            break;
+
+         case 14:
+            ret = offsetof(VexGuestMIPS64State, guest_w14);
+            break;
+
+         case 15:
+            ret = offsetof(VexGuestMIPS64State, guest_w15);
+            break;
+
+         case 16:
+            ret = offsetof(VexGuestMIPS64State, guest_w16);
+            break;
+
+         case 17:
+            ret = offsetof(VexGuestMIPS64State, guest_w17);
+            break;
+
+         case 18:
+            ret = offsetof(VexGuestMIPS64State, guest_w18);
+            break;
+
+         case 19:
+            ret = offsetof(VexGuestMIPS64State, guest_w19);
+            break;
+
+         case 20:
+            ret = offsetof(VexGuestMIPS64State, guest_w20);
+            break;
+
+         case 21:
+            ret = offsetof(VexGuestMIPS64State, guest_w21);
+            break;
+
+         case 22:
+            ret = offsetof(VexGuestMIPS64State, guest_w22);
+            break;
+
+         case 23:
+            ret = offsetof(VexGuestMIPS64State, guest_w23);
+            break;
+
+         case 24:
+            ret = offsetof(VexGuestMIPS64State, guest_w24);
+            break;
+
+         case 25:
+            ret = offsetof(VexGuestMIPS64State, guest_w25);
+            break;
+
+         case 26:
+            ret = offsetof(VexGuestMIPS64State, guest_w26);
+            break;
+
+         case 27:
+            ret = offsetof(VexGuestMIPS64State, guest_w27);
+            break;
+
+         case 28:
+            ret = offsetof(VexGuestMIPS64State, guest_w28);
+            break;
+
+         case 29:
+            ret = offsetof(VexGuestMIPS64State, guest_w29);
+            break;
+
+         case 30:
+            ret = offsetof(VexGuestMIPS64State, guest_w30);
+            break;
+
+         case 31:
+            ret = offsetof(VexGuestMIPS64State, guest_w31);
+            break;
+
+         default:
+            vassert(0);
+            break;
+      }
+   } else {
+      switch (msaRegNo) {
+         case 0:
+            ret = offsetof(VexGuestMIPS32State, guest_w0);
+            break;
+
+         case 1:
+            ret = offsetof(VexGuestMIPS32State, guest_w1);
+            break;
+
+         case 2:
+            ret = offsetof(VexGuestMIPS32State, guest_w2);
+            break;
+
+         case 3:
+            ret = offsetof(VexGuestMIPS32State, guest_w3);
+            break;
+
+         case 4:
+            ret = offsetof(VexGuestMIPS32State, guest_w4);
+            break;
+
+         case 5:
+            ret = offsetof(VexGuestMIPS32State, guest_w5);
+            break;
+
+         case 6:
+            ret = offsetof(VexGuestMIPS32State, guest_w6);
+            break;
+
+         case 7:
+            ret = offsetof(VexGuestMIPS32State, guest_w7);
+            break;
+
+         case 8:
+            ret = offsetof(VexGuestMIPS32State, guest_w8);
+            break;
+
+         case 9:
+            ret = offsetof(VexGuestMIPS32State, guest_w9);
+            break;
+
+         case 10:
+            ret = offsetof(VexGuestMIPS32State, guest_w10);
+            break;
+
+         case 11:
+            ret = offsetof(VexGuestMIPS32State, guest_w11);
+            break;
+
+         case 12:
+            ret = offsetof(VexGuestMIPS32State, guest_w12);
+            break;
+
+         case 13:
+            ret = offsetof(VexGuestMIPS32State, guest_w13);
+            break;
+
+         case 14:
+            ret = offsetof(VexGuestMIPS32State, guest_w14);
+            break;
+
+         case 15:
+            ret = offsetof(VexGuestMIPS32State, guest_w15);
+            break;
+
+         case 16:
+            ret = offsetof(VexGuestMIPS32State, guest_w16);
+            break;
+
+         case 17:
+            ret = offsetof(VexGuestMIPS32State, guest_w17);
+            break;
+
+         case 18:
+            ret = offsetof(VexGuestMIPS32State, guest_w18);
+            break;
+
+         case 19:
+            ret = offsetof(VexGuestMIPS32State, guest_w19);
+            break;
+
+         case 20:
+            ret = offsetof(VexGuestMIPS32State, guest_w20);
+            break;
+
+         case 21:
+            ret = offsetof(VexGuestMIPS32State, guest_w21);
+            break;
+
+         case 22:
+            ret = offsetof(VexGuestMIPS32State, guest_w22);
+            break;
+
+         case 23:
+            ret = offsetof(VexGuestMIPS32State, guest_w23);
+            break;
+
+         case 24:
+            ret = offsetof(VexGuestMIPS32State, guest_w24);
+            break;
+
+         case 25:
+            ret = offsetof(VexGuestMIPS32State, guest_w25);
+            break;
+
+         case 26:
+            ret = offsetof(VexGuestMIPS32State, guest_w26);
+            break;
+
+         case 27:
+            ret = offsetof(VexGuestMIPS32State, guest_w27);
+            break;
+
+         case 28:
+            ret = offsetof(VexGuestMIPS32State, guest_w28);
+            break;
+
+         case 29:
+            ret = offsetof(VexGuestMIPS32State, guest_w29);
+            break;
+
+         case 30:
+            ret = offsetof(VexGuestMIPS32State, guest_w30);
+            break;
+
+         case 31:
+            ret = offsetof(VexGuestMIPS32State, guest_w31);
+            break;
+
+         default:
+            vassert(0);
+            break;
+      }
+   }
+
+   return ret;
+}
+
+
 /* Do a endian load of a 32-bit word, regardless of the endianness of the
    underlying host. */
 static inline UInt getUInt(const UChar * p)
@@ -469,6 +752,15 @@ static inline UInt getUInt(const UChar * p)
          assign(t1, binop(Iop_Add64, getIReg(rs), \
                                      mkU64(extend_s_16to64(imm)))); \
 
+#define LOAD_STORE_PATTERN_MSA(imm) \
+   t1 = newTemp(mode64 ? Ity_I64 : Ity_I32); \
+      if (!mode64) \
+         assign(t1, binop(Iop_Add32, getIReg(ws),  \
+                                     mkU32(extend_s_10to32(imm)))); \
+      else \
+         assign(t1, binop(Iop_Add64, getIReg(ws), \
+                                     mkU64(extend_s_10to64(imm)))); \
+
 #define LOADX_STORE_PATTERN \
    t1 = newTemp(mode64 ? Ity_I64 : Ity_I32); \
       if(!mode64) \
@@ -765,11 +1057,25 @@ static Bool branch_or_jump(const UChar * addr)
    }
 
    if (opcode == 0x11) {
-      /*bc1f & bc1t */
+      /* bc1f & bc1t */
       fmt = get_fmt(cins);
       if (fmt == 0x08) {
          return True;
       }
+
+      /* MSA branches */
+      /* bnz.df, bz.df */
+      if (fmt >= 0x18) {
+         return True;
+      }
+      /* bnz.v */
+      if (fmt == 0x0f) {
+         return True;
+      }
+      /* bz.v */
+      if (fmt == 0x0b) {
+         return True;
+      }
    }
 
    /* bposge32 */
@@ -964,9 +1270,9 @@ static UShort extend_s_10to16(UInt x)
    return (UShort) ((((Int) x) << 22) >> 22);
 }
 
-static ULong extend_s_10to32(UInt x)
+static UInt extend_s_10to32(UInt x)
 {
-   return (ULong)((((Long) x) << 22) >> 22);
+   return (UInt)((((Int) x) << 22) >> 22);
 }
 
 static ULong extend_s_10to64(UInt x)
@@ -1066,6 +1372,12 @@ static IRExpr *getIReg(UInt iregNo)
    }
 }
 
+
+static IRExpr *getWReg(UInt wregNo) {
+   vassert(wregNo <= 31);
+   return IRExpr_Get(msaGuestRegOffset(wregNo), Ity_V128);
+}
+
 static IRExpr *getHI(void)
 {
    if (mode64)
@@ -1106,6 +1418,13 @@ static IRExpr *getLLdata(void)
       return IRExpr_Get(offsetof(VexGuestMIPS32State, guest_LLdata), Ity_I32);
 }
 
+static IRExpr *getMSACSR(void) {
+   if (mode64)
+      return IRExpr_Get(offsetof(VexGuestMIPS64State, guest_MSACSR), Ity_I32);
+   else
+      return IRExpr_Get(offsetof(VexGuestMIPS32State, guest_MSACSR), Ity_I32);
+}
+
 /* Get byte from register reg, byte pos from 0 to 3 (or 7 for MIPS64) . */
 static IRExpr *getByteFromReg(UInt reg, UInt byte_pos)
 {
@@ -1144,6 +1463,13 @@ static void putLLdata(IRExpr * e)
       stmt(IRStmt_Put(offsetof(VexGuestMIPS32State, guest_LLdata), e));
 }
 
+static void putMSACSR(IRExpr * e) {
+   if (mode64)
+      stmt(IRStmt_Put(offsetof(VexGuestMIPS64State, guest_MSACSR), e));
+   else
+      stmt(IRStmt_Put(offsetof(VexGuestMIPS32State, guest_MSACSR), e));
+}
+
 /* fs   - fpu source register number.
    inst - fpu instruction that needs to be executed.
    sz32 - size of source register.
@@ -1231,26 +1557,90 @@ static void calculateFCSR(UInt fs, UInt ft, UInt inst, Bool sz32, UInt opN)
    putFCSR(mkexpr(fcsr));
 }
 
-static IRExpr *getULR(void)
-{
-   if (mode64)
-      return IRExpr_Get(offsetof(VexGuestMIPS64State, guest_ULR), Ity_I64);
-   else
-      return IRExpr_Get(offsetof(VexGuestMIPS32State, guest_ULR), Ity_I32);
-}
+/* ws, wt - source MSA register numbers.
+   inst   - MSA fp instruction that needs to be executed.
+   opN    - number of operads:
+             1 - unary operation.
+             2 - binary operation. */
+static void calculateMSACSR(UInt ws, UInt wt, UInt inst, UInt opN) {
+   IRDirty *d;
+   IRTemp msacsr = newTemp(Ity_I32);
+   /* IRExpr_BBPTR() => Need to pass pointer to guest state to helper. */
+   d = unsafeIRDirty_1_N(msacsr, 0,
+                         "mips_dirtyhelper_calculate_MSACSR",
+                         &mips_dirtyhelper_calculate_MSACSR,
+                         mkIRExprVec_4(IRExpr_GSPTR(),
+                                       mkU32(ws),
+                                       mkU32(wt),
+                                       mkU32(inst)));
 
-static void putIReg(UInt archreg, IRExpr * e)
-{
-   IRType ty = mode64 ? Ity_I64 : Ity_I32;
-   vassert(archreg < 32);
-   vassert(typeOfIRExpr(irsb->tyenv, e) == ty);
-   if (archreg != 0)
-      stmt(IRStmt_Put(integerGuestRegOffset(archreg), e));
-}
+   if (opN == 1) {  /* Unary operation. */
+      /* Declare we're reading guest state. */
+      d->nFxState = 2;
+      vex_bzero(&d->fxState, sizeof(d->fxState));
+      d->fxState[0].fx     = Ifx_Read;  /* read */
 
-static IRExpr *mkNarrowTo32(IRType ty, IRExpr * src)
-{
-   vassert(ty == Ity_I32 || ty == Ity_I64);
+      if (mode64)
+         d->fxState[0].offset = offsetof(VexGuestMIPS64State, guest_MSACSR);
+      else
+         d->fxState[0].offset = offsetof(VexGuestMIPS32State, guest_MSACSR);
+
+      d->fxState[0].size   = sizeof(UInt);
+      d->fxState[1].fx     = Ifx_Read;  /* read */
+      d->fxState[1].offset = msaGuestRegOffset(ws);
+      d->fxState[1].size   = sizeof(ULong);
+   } else if (opN == 2) {  /* Binary operation. */
+      /* Declare we're reading guest state. */
+      d->nFxState = 3;
+      vex_bzero(&d->fxState, sizeof(d->fxState));
+      d->fxState[0].fx     = Ifx_Read;  /* read */
+
+      if (mode64)
+         d->fxState[0].offset = offsetof(VexGuestMIPS64State, guest_MSACSR);
+      else
+         d->fxState[0].offset = offsetof(VexGuestMIPS32State, guest_MSACSR);
+
+      d->fxState[0].size   = sizeof(UInt);
+      d->fxState[1].fx     = Ifx_Read;  /* read */
+      d->fxState[1].offset = msaGuestRegOffset(ws);
+      d->fxState[1].size   = sizeof(ULong);
+      d->fxState[2].fx     = Ifx_Read;  /* read */
+      d->fxState[2].offset = msaGuestRegOffset(wt);
+      d->fxState[2].size   = sizeof(ULong);
+   }
+
+   stmt(IRStmt_Dirty(d));
+   putMSACSR(mkexpr(msacsr));
+}
+
+static IRExpr *getULR(void)
+{
+   if (mode64)
+      return IRExpr_Get(offsetof(VexGuestMIPS64State, guest_ULR), Ity_I64);
+   else
+      return IRExpr_Get(offsetof(VexGuestMIPS32State, guest_ULR), Ity_I32);
+}
+
+static void putIReg(UInt archreg, IRExpr * e)
+{
+   IRType ty = mode64 ? Ity_I64 : Ity_I32;
+   vassert(archreg < 32);
+   vassert(typeOfIRExpr(irsb->tyenv, e) == ty);
+   if (archreg != 0)
+      stmt(IRStmt_Put(integerGuestRegOffset(archreg), e));
+}
+
+static void putWReg(UInt wregNo, IRExpr * e) {
+   vassert(wregNo <= 31);
+   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
+   stmt(IRStmt_Put(msaGuestRegOffset(wregNo), e));
+   stmt(IRStmt_Put(floatGuestRegOffset(wregNo),
+                   unop(Iop_ReinterpI64asF64, unop(Iop_V128to64, e))));
+}
+
+static IRExpr *mkNarrowTo32(IRType ty, IRExpr * src)
+{
+   vassert(ty == Ity_I32 || ty == Ity_I64);
    return ty == Ity_I64 ? unop(Iop_64to32, src) : src;
 }
 
@@ -1311,6 +1701,12 @@ static IRExpr *mkNarrowTo8 ( IRType ty, IRExpr * src )
    return ty == Ity_I64 ? unop(Iop_64to8, src) : unop(Iop_32to8, src);
 }
 
+static IRExpr *mkNarrowTo16 ( IRType ty, IRExpr * src )
+{
+   vassert(ty == Ity_I32 || ty == Ity_I64);
+   return ty == Ity_I64 ? unop(Iop_64to16, src) : unop(Iop_32to16, src);
+}
+
 static void putPC(IRExpr * e)
 {
    stmt(IRStmt_Put(OFFB_PC, e));
@@ -1492,6 +1888,11 @@ static void putFReg(UInt dregNo, IRExpr * e)
    IRType ty = fp_mode64 ? Ity_F64 : Ity_F32;
    vassert(typeOfIRExpr(irsb->tyenv, e) == ty);
    stmt(IRStmt_Put(floatGuestRegOffset(dregNo), e));
+   if (has_msa && fp_mode64) {
+      stmt(IRStmt_Put(msaGuestRegOffset(dregNo),
+           binop(Iop_64HLtoV128,
+                    mkU64(0), unop(Iop_ReinterpF64asI64, e))));
+   }
 }
 
 static void putDReg(UInt dregNo, IRExpr * e)
@@ -1501,6 +1902,10 @@ static void putDReg(UInt dregNo, IRExpr * e)
       IRType ty = Ity_F64;
       vassert(typeOfIRExpr(irsb->tyenv, e) == ty);
       stmt(IRStmt_Put(floatGuestRegOffset(dregNo), e));
+      if (has_msa)
+         stmt(IRStmt_Put(msaGuestRegOffset(dregNo),
+              binop(Iop_64HLtoV128,
+                    mkU64(0), unop(Iop_ReinterpF64asI64, e))));
    } else {
       vassert(dregNo < 32);
       vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
@@ -1555,6 +1960,30 @@ static IRExpr* get_IR_roundingmode ( void )
                 binop(Iop_Shl32, mkexpr(rm_MIPS), mkU8(1)), mkU32(2)));
 }
 
+static IRExpr* get_IR_roundingmode_MSA ( void ) {
+   /*
+      rounding mode | MIPS | IR
+      ------------------------
+      to nearest    | 00  | 00
+      to zero       | 01  | 11
+      to +infinity  | 10  | 10
+      to -infinity  | 11  | 01
+   */
+   IRTemp rm_MIPS = newTemp(Ity_I32);
+   /* Last two bits in MSACSR are rounding mode. */
+
+   if (mode64)
+      assign(rm_MIPS, binop(Iop_And32, IRExpr_Get(offsetof(VexGuestMIPS64State,
+                            guest_MSACSR), Ity_I32), mkU32(3)));
+   else
+      assign(rm_MIPS, binop(Iop_And32, IRExpr_Get(offsetof(VexGuestMIPS32State,
+                            guest_MSACSR), Ity_I32), mkU32(3)));
+
+   /* rm_IR = XOR( rm_MIPS32, (rm_MIPS32 << 1) & 2) */
+   return binop(Iop_Xor32, mkexpr(rm_MIPS), binop(Iop_And32,
+                binop(Iop_Shl32, mkexpr(rm_MIPS), mkU8(1)), mkU32(2)));
+}
+
 /* sz, ULong -> IRExpr */
 static IRExpr *mkSzImm ( IRType ty, ULong imm64 )
 {
@@ -11972,121 +12401,13378 @@ static UInt disDSPInstr_MIPS_WRK ( UInt cins )
 
                      assign(t1, binop(Iop_Shl32, getIReg(rt), mkU8(rd)));
 
-                     if (31 == rd) {
-                        putIReg(rt, binop(Iop_Or32,
-                                          mkexpr(t1),
-                                          binop(Iop_And32,
-                                                getIReg(rs),
-                                                mkU32(0x7fffffff))));
-                     } else if (1 == rd) {
-                        putIReg(rt,
-                                binop(Iop_Or32,
-                                      mkexpr(t1),
-                                      binop(Iop_And32,
-                                            getIReg(rs), mkU32(0x1))));
-                     } else {
-                        assign(t2,
-                               unop(Iop_Not32,
-                                    binop(Iop_Shl32,
-                                          mkU32(0xffffffff), mkU8(rd))));
+                     if (31 == rd) {
+                        putIReg(rt, binop(Iop_Or32,
+                                          mkexpr(t1),
+                                          binop(Iop_And32,
+                                                getIReg(rs),
+                                                mkU32(0x7fffffff))));
+                     } else if (1 == rd) {
+                        putIReg(rt,
+                                binop(Iop_Or32,
+                                      mkexpr(t1),
+                                      binop(Iop_And32,
+                                            getIReg(rs), mkU32(0x1))));
+                     } else {
+                        assign(t2,
+                               unop(Iop_Not32,
+                                    binop(Iop_Shl32,
+                                          mkU32(0xffffffff), mkU8(rd))));
+
+                        putIReg(rt, binop(Iop_Or32,
+                                          mkexpr(t1),
+                                          binop(Iop_And32,
+                                                getIReg(rs), mkexpr(t2))));
+                     }
+                     break;
+                  }
+                  case 0x1: {  /* PREPEND */
+                     DIP("prepend r%u, r%u, %u", rt, rs, rd);
+                     vassert(!mode64);
+                     t1 = newTemp(Ity_I32);
+                     t2 = newTemp(Ity_I32);
+                     t3 = newTemp(Ity_I32);
+
+                     if (0 != rd) {
+                        assign(t1, binop(Iop_Shr32, getIReg(rt), mkU8(rd)));
+
+                        if (31 == rd) {
+                           putIReg(rt, binop(Iop_Or32,
+                                             mkexpr(t1),
+                                             binop(Iop_Shl32,
+                                                   binop(Iop_And32,
+                                                         getIReg(rs),
+                                                         mkU32(0x7fffffff)),
+                                                   mkU8(1))));
+                        } else if (1 == rd) {
+                           putIReg(rt, binop(Iop_Or32,
+                                             mkexpr(t1),
+                                             binop(Iop_Shl32,
+                                                   binop(Iop_And32,
+                                                         getIReg(rs),
+                                                         mkU32(0x1)),
+                                                   mkU8(31))));
+                        } else {
+                           assign(t2, binop(Iop_Add32, mkU32(rd), mkU32(0x1)));
+
+                           assign(t3, unop(Iop_Not32,
+                                           binop(Iop_Shl32,
+                                                 mkU32(0xffffffff),
+                                                 unop(Iop_32to8, mkexpr(t2)))));
+
+                           putIReg(rt, binop(Iop_Or32,
+                                             mkexpr(t1),
+                                             binop(Iop_Shl32,
+                                                   binop(Iop_And32,
+                                                         getIReg(rs),
+                                                         mkexpr(t3)),
+                                                   mkU8(32-rd))));
+                        }
+                     }
+                     break;
+                  }
+                  case 0x10: {  /* BALIGN */
+                     DIP("balign r%u, r%u, %u", rt, rs, rd);
+                     vassert(!mode64);
+                     t1 = newTemp(Ity_I32);
+                     t2 = newTemp(Ity_I32);
+                     t3 = newTemp(Ity_I32);
+
+                     if ((2 != rd) && (0 != rd)) {
+                        assign(t1, binop(Iop_Shl32,
+                                         binop(Iop_And32,
+                                               mkU32(rd), mkU32(0x3)),
+                                         mkU8(0x3)));
+                        assign(t2, binop(Iop_Shl32,
+                                         getIReg(rt),
+                                         unop(Iop_32to8, mkexpr(t1))));
+                        assign(t3, binop(Iop_Shr32,
+                                         getIReg(rs),
+                                         unop(Iop_32to8,
+                                              binop(Iop_Shl32,
+                                                    binop(Iop_Sub32,
+                                                          mkU32(0x4),
+                                                          binop(Iop_And32,
+                                                                mkU32(rd),
+                                                                mkU32(0x3))),
+                                                    mkU8(0x3)))));
+                        putIReg(rt, binop(Iop_Or32, mkexpr(t2), mkexpr(t3)));
+                     }
+                     break;
+                  }
+                  default:
+                     return -1;
+               }
+               break;  /* end of APPEND */
+            }
+            default:
+               return -1;
+         }
+         break;
+      }
+      default:
+            return -1;
+   }
+   return 0;
+}
+
+static Int msa_I8_logical(UInt cins, UChar wd, UChar ws) {
+   IRTemp t1, t2;
+   UShort operation;
+   UChar i8;
+
+   operation = (cins >> 24) & 3;
+   i8 = (cins & 0x00FF0000) >> 16;
+   switch (operation) {
+      case 0x00: {  /* ANDI.B */
+            DIP("ANDI.B w%d, w%d, %d", wd, ws, i8);
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            ULong tmp = i8;
+            tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                   (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                   (tmp << 8);
+            assign(t1, getWReg(ws));
+            assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+            putWReg(wd, binop(Iop_AndV128, mkexpr(t1), mkexpr(t2)));
+            break;
+         }
+
+      case 0x01: { /* ORI.B */
+            DIP("ORI.B w%d, w%d, %d", wd, ws, i8);
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            ULong tmp = i8;
+            tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                   (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                   (tmp << 8);
+            assign(t1, getWReg(ws));
+            assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+            putWReg(wd, binop(Iop_OrV128, mkexpr(t1), mkexpr(t2)));
+            break;
+         }
+
+      case 0x02: { /* NORI.B */
+            DIP("NORI.B w%d, w%d, %d", wd, ws, i8);
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            ULong tmp = i8;
+            tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                   (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                   (tmp << 8);
+            assign(t1, getWReg(ws));
+            assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+            putWReg(wd, unop(Iop_NotV128, binop(Iop_OrV128,
+                                                mkexpr(t1), mkexpr(t2))));
+            break;
+         }
+
+      case 0x03: {  /* XORI.B */
+            DIP("XORI.B w%d, w%d, %d", wd, ws, i8);
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            ULong tmp = i8;
+            tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                   (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                   (tmp << 8);
+            assign(t1, getWReg(ws));
+            assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+            putWReg(wd, binop(Iop_XorV128, mkexpr(t1), mkexpr(t2)));
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_I8_branch(UInt cins, UChar wd, UChar ws) {
+   IRTemp t1, t2, t3, t4;
+   UShort operation;
+   UChar i8;
+
+   operation = (cins >> 24) & 3;
+   i8 = (cins & 0x00FF0000) >> 16;
+   switch (operation) {
+      case 0x00: { /* BMNZI.B */
+            DIP("BMNZI.B w%d, w%d, %d", wd, ws, i8);
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            t3 = newTemp(Ity_V128);
+            t4 = newTemp(Ity_V128);
+            ULong tmp = i8;
+            tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                   (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                   (tmp << 8);
+            assign(t4, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+            assign(t1, binop(Iop_AndV128, getWReg(ws), mkexpr(t4)));
+            assign(t2, binop(Iop_AndV128, getWReg(wd),
+                                          unop(Iop_NotV128, mkexpr(t4))));
+            assign(t3, binop(Iop_OrV128, mkexpr(t1), mkexpr(t2)));
+            putWReg(wd, mkexpr(t3));
+            break;
+         }
+
+      case 0x01: { /* BMZI.B */
+            DIP("BMZI.B w%d, w%d, %d", wd, ws, i8);
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            t3 = newTemp(Ity_V128);
+            t4 = newTemp(Ity_V128);
+            ULong tmp = i8;
+            tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                   (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                   (tmp << 8);
+            assign(t4, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+            assign(t1, binop(Iop_AndV128, getWReg(wd), mkexpr(t4)));
+            assign(t2, binop(Iop_AndV128, getWReg(ws),
+                             unop(Iop_NotV128, mkexpr(t4))));
+            assign(t3, binop(Iop_OrV128, mkexpr(t1), mkexpr(t2)));
+            putWReg(wd, mkexpr(t3));
+            break;
+         }
+
+      case 0x02: { /* BSELI.B */
+            DIP("BSELI.B w%d, w%d, %d", wd, ws, i8);
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            t3 = newTemp(Ity_V128);
+            t4 = newTemp(Ity_V128);
+            ULong tmp = i8;
+            tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                   (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                   (tmp << 8);
+            assign(t4, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+            assign(t1, binop(Iop_AndV128, getWReg(wd), mkexpr(t4)));
+            assign(t2, binop(Iop_AndV128, getWReg(ws),
+                             unop(Iop_NotV128, getWReg(wd))));
+            assign(t3, binop(Iop_OrV128, mkexpr(t1), mkexpr(t2)));
+            putWReg(wd, mkexpr(t3));
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_I8_shift(UInt cins, UChar wd, UChar ws) {
+   IRTemp t1, t2;
+   UShort operation;
+   UChar i8;
+
+   operation = (cins >> 24) & 3;
+   i8 = (cins & 0x00FF0000) >> 16;
+   switch (operation) {
+      case 0x00: { /* SHF.B */
+            DIP("SHF.B w%d, w%d, %d", wd, ws, i8);
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            assign(t1, getWReg(wd));
+            assign(t2, getWReg(ws));
+            Int i;
+            IRTemp tmp[16];
+
+            for (i = 0; i < 16; i++) {
+               tmp[i] = newTemp(Ity_I8);
+               assign(tmp[i],
+                      binop(Iop_GetElem8x16, mkexpr(t2),
+                            mkU8(i - (i % 4) +
+                                 ((i8 >> (i % 4) * 2) & 0x03))));
+            }
+
+            putWReg(wd, binop(Iop_64HLtoV128,
+                              binop(Iop_32HLto64,
+                                    binop(Iop_16HLto32,
+                                          binop(Iop_8HLto16,
+                                                mkexpr(tmp[15]),
+                                                mkexpr(tmp[14])),
+                                          binop(Iop_8HLto16,
+                                                mkexpr(tmp[13]),
+                                                mkexpr(tmp[12]))),
+                                    binop(Iop_16HLto32,
+                                          binop(Iop_8HLto16,
+                                                mkexpr(tmp[11]),
+                                                mkexpr(tmp[10])),
+                                          binop(Iop_8HLto16,
+                                                mkexpr(tmp[9]),
+                                                mkexpr(tmp[8])))),
+                              binop(Iop_32HLto64,
+                                    binop(Iop_16HLto32,
+                                          binop(Iop_8HLto16,
+                                                mkexpr(tmp[7]),
+                                                mkexpr(tmp[6])),
+                                          binop(Iop_8HLto16,
+                                                mkexpr(tmp[5]),
+                                                mkexpr(tmp[4]))),
+                                    binop(Iop_16HLto32,
+                                          binop(Iop_8HLto16,
+                                                mkexpr(tmp[3]),
+                                                mkexpr(tmp[2])),
+                                          binop(Iop_8HLto16,
+                                                mkexpr(tmp[1]),
+                                                mkexpr(tmp[0]))))));
+            break;
+         }
+
+      case 0x01: { /* SHF.H */
+            DIP("SHF.H w%d, w%d, %d", wd, ws, i8);
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            assign(t1, getWReg(wd));
+            assign(t2, getWReg(ws));
+            Int i;
+            IRTemp tmp[8];
+
+            for (i = 0; i < 8; i++) {
+               tmp[i] = newTemp(Ity_I16);
+               assign(tmp[i],
+                      binop(Iop_GetElem16x8, mkexpr(t2),
+                            mkU8(i - (i % 4) +
+                                 ((i8 >> (i % 4) * 2) & 0x03))));
+            }
+
+            putWReg(wd, binop(Iop_64HLtoV128,
+                              binop(Iop_32HLto64,
+                                    binop(Iop_16HLto32,
+                                          mkexpr(tmp[7]), mkexpr(tmp[6])),
+                                    binop(Iop_16HLto32,
+                                          mkexpr(tmp[5]), mkexpr(tmp[4]))),
+                              binop(Iop_32HLto64,
+                                    binop(Iop_16HLto32,
+                                          mkexpr(tmp[3]), mkexpr(tmp[2])),
+                                    binop(Iop_16HLto32,
+                                          mkexpr(tmp[1]), mkexpr(tmp[0])))));
+            break;
+         }
+
+      case 0x02: { /* SHF.W */
+            DIP("SHF.W w%d, w%d, %d", wd, ws, i8);
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            assign(t1, getWReg(wd));
+            assign(t2, getWReg(ws));
+            Int i;
+            IRTemp tmp[4];
+
+            for (i = 0; i < 4; i++) {
+               tmp[i] = newTemp(Ity_I32);
+               assign(tmp[i],
+                      binop(Iop_GetElem32x4, mkexpr(t2),
+                            mkU8(i - (i % 4) +
+                                 ((i8 >> (i % 4) * 2) & 0x03))));
+            }
+
+            putWReg(wd, binop(Iop_64HLtoV128,
+                              binop(Iop_32HLto64,
+                                    mkexpr(tmp[3]), mkexpr(tmp[2])),
+                              binop(Iop_32HLto64,
+                                    mkexpr(tmp[1]), mkexpr(tmp[0]))));
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_I5_06(UInt cins, UChar wd, UChar ws) { /* I5 (0x06) */
+   IRTemp t1, t2, t3;
+   UShort operation;
+   UChar df, wt;
+
+   operation = (cins & 0x03800000) >> 23;
+   df = (cins & 0x00600000) >> 21;
+   wt = (cins & 0x001F0000) >> 16;
+
+   switch (operation) {
+      case 0x00: { /* ADDVI */
+            ULong tmp = wt;
+
+            switch (df) {
+               case 0x00: { /* ADDVI.B */
+                     DIP("ADDVI.B w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                            (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                            (tmp << 8);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Add8x16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* ADDVI.H */
+                     DIP("ADDVI.H w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Add16x8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* ADDVI.W */
+                     DIP("ADDVI.W w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     tmp |= (tmp << 32);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Add32x4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* ADDVI.D */
+                     DIP("ADDVI.D w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Add64x2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+            }
+
+            break;
+         }
+
+      case 0x01: { /* SUBVI */
+            ULong tmp = wt;
+
+            switch (df) {
+               case 0x00: { /* SUBVI.B */
+                     DIP("SUBVI.B w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                            (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                            (tmp << 8);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Sub8x16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* SUBVI.H */
+                     DIP("SUBVI.H w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Sub16x8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* SUBVI.W */
+                     DIP("SUBVI.W w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     tmp |= (tmp << 32);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Sub32x4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* SUBVI.D */
+                     DIP("SUBVI.D w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Sub64x2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+            }
+
+            break;
+         }
+
+      case 0x02: { /* MAXI_S */
+            ULong tmp = wt;
+
+            switch (df) {
+               case 0x00: { /* MAXI_S.B */
+                     DIP("MAXI_S.B w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     char stemp = ((int)tmp << 27) >> 27;
+                     tmp = (UChar)stemp;
+                     tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                            (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                            (tmp << 8);
+                     assign(t1, getWReg(ws));
+                     assign(t2,binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Max8Sx16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* MAXI_S.H */
+                     DIP("MAXI_S.H w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     short stemp = ((int)tmp << 27) >> 27;
+                     tmp = (UShort)stemp;
+                     tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Max16Sx8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* MAXI_S.W */
+                     DIP("MAXI_S.W w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     int stemp = ((int)tmp << 27) >> 27;
+                     tmp = (UInt)stemp;
+                     tmp |= (tmp << 32);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Max32Sx4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* MAXI_S.D */
+                     DIP("MAXI_S.D w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     Long stemp = ((Long)tmp << 59) >> 59;
+                     tmp = stemp;
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Max64Sx2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+            }
+
+            break;
+         }
+
+      case 0x03: { /* MAXI_U */
+            ULong tmp = wt;
+
+            switch (df) {
+               case 0x00: { /* MAXI_U.B */
+                     DIP("MAXI_U.B w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                            (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                            (tmp << 8);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Max8Ux16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* MAXI_U.H */
+                     DIP("MAXI_U.H w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Max16Ux8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* MAXI_U.W */
+                     DIP("MAXI_U.W w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     tmp |= (tmp << 32);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Max32Ux4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* MAXI_U.D */
+                     DIP("MAXI_U.D w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Max64Ux2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+            }
+
+            break;
+         }
+
+      case 0x04: { /* MINI_S */
+            ULong tmp = wt;
+
+            switch (df) {
+               case 0x00: { /* MINI_S.B */
+                     DIP("MINI_S.B w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     char stemp = ((int)tmp << 27) >> 27;
+                     tmp = (UChar)stemp;
+                     tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                            (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                            (tmp << 8);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Min8Sx16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* MINI_S.H */
+                     DIP("MINI_S.H w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     short stemp = ((int)tmp << 27) >> 27;
+                     tmp = (UShort)stemp;
+                     tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Min16Sx8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* MINI_S.W */
+                     DIP("MINI_S.W w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     int stemp = ((int)tmp << 27) >> 27;
+                     tmp = (UInt)stemp;
+                     tmp |= (tmp << 32);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Min32Sx4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* MINI_S.D */
+                     DIP("MINI_S.D w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     Long stemp = ((Long)tmp << 59) >> 59;
+                     tmp = stemp;
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Min64Sx2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+            }
+
+            break;
+         }
+
+      case 0x05: { /* MINI_U */
+            ULong tmp = wt;
+
+            switch (df) {
+               case 0x00: { /* MINI_U.B */
+                     DIP("MINI_U.B w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                            (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                            (tmp << 8);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Min8Ux16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* MINI_U.H */
+                     DIP("MINI_U.H w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Min16Ux8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* MINI_U.W */
+                     DIP("MINI_U.W w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     tmp |= (tmp << 32);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Min32Ux4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* MINI_U.D */
+                     DIP("MINI_U.D w%d, w%d, %d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_Min64Ux2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+            }
+
+            break;
+         }
+
+      default: {
+            return -1;
+         }
+   }
+
+   return 0;
+}
+
+static Int msa_I5_07(UInt cins, UChar wd, UChar ws) { /* I5 (0x07) / I10 */
+   IRTemp t1, t2, t3;
+   UShort operation;
+   UChar df, i5;
+
+   operation = (cins & 0x03800000) >> 23;
+   df = (cins & 0x00600000) >> 21;
+   i5 = (cins & 0x001F0000) >> 16;
+
+   switch (operation) {
+      case 0x00: {
+            ULong tmp = i5;
+
+            switch (df) {
+               case 0x00: { /* CEQI.B */
+                     DIP("CEQI.B w%d, w%d, %d", wd, ws, i5);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     char stemp = ((int)tmp << 27) >> 27;
+                     tmp = (UChar)stemp;
+                     tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                            (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                            (tmp << 8);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_CmpEQ8x16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* CEQI.H */
+                     DIP("CEQI.H w%d, w%d, %d", wd, ws, i5);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     short stemp = ((int)tmp << 27) >> 27;
+                     tmp = (UShort)stemp;
+                     tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_CmpEQ16x8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* CEQI.W */
+                     DIP("CEQI.W w%d, w%d, %d", wd, ws, i5);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     int stemp = ((int)tmp << 27) >> 27;
+                     tmp = (UInt)stemp;
+                     tmp |= (tmp << 32);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_CmpEQ32x4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* CEQI.D */
+                     DIP("CEQI.D w%d, w%d, %d", wd, ws, i5);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     Long stemp = ((Long)tmp << 59) >> 59;
+                     tmp = stemp;
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_CmpEQ64x2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+            }
+
+            break;
+         }
+
+      case 0x02: { /* CLTI_S.df */
+            ULong tmp = i5;
+
+            switch (df) {
+               case 0x00: { /* CLTI_S.B */
+                     DIP("CLTI_S.B w%d, w%d, %d", wd, ws, i5);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     char stemp = ((int)tmp << 27) >> 27;
+                     tmp = (UChar)stemp;
+                     tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                            (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                            (tmp << 8);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_CmpGT8Sx16, mkexpr(t2), mkexpr(t1)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* CLTI_S.H */
+                     DIP("CLTI_S.H w%d, w%d, %d", wd, ws, i5);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     short stemp = ((int)tmp << 27) >> 27;
+                     tmp = (UShort)stemp;
+                     tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_CmpGT16Sx8, mkexpr(t2), mkexpr(t1)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* CLTI_S.W */
+                     DIP("CLTI_S.W w%d, w%d, %d", wd, ws, i5);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     int stemp = ((int)tmp << 27) >> 27;
+                     tmp = (UInt)stemp;
+                     tmp |= (tmp << 32);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_CmpGT32Sx4, mkexpr(t2), mkexpr(t1)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* CLTI_S.D */
+                     DIP("CLTI_S.D w%d, w%d, %d", wd, ws, i5);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     Long stemp = ((Long)tmp << 59) >> 59;
+                     tmp = stemp;
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_CmpGT64Sx2, mkexpr(t2), mkexpr(t1)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x03: { /* CLTI_U.df */
+            ULong tmp = i5;
+
+            switch (df) {
+               case 0x00: { /* CLTI_U.B */
+                     DIP("CLTI_U.B w%d, w%d, %d", wd, ws, i5);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                            (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                            (tmp << 8);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_CmpGT8Ux16, mkexpr(t2), mkexpr(t1)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* CLTI_U.H */
+                     DIP("CLTI_U.H w%d, w%d, %d", wd, ws, i5);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_CmpGT16Ux8, mkexpr(t2), mkexpr(t1)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* CLTI_U.W */
+                     DIP("CLTI_U.W w%d, w%d, %d", wd, ws, i5);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     tmp |= (tmp << 32);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_CmpGT32Ux4, mkexpr(t2), mkexpr(t1)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* CLTI_U.D */
+                     DIP("CLTI_U.D w%d, w%d, %d", wd, ws, i5);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_CmpGT64Ux2, mkexpr(t2), mkexpr(t1)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+            }
+
+            break;
+         }
+
+      case 0x04: { /* CLEI_S.df */
+            ULong tmp = i5;
+
+            switch (df) {
+               case 0x00: { /* CLEI_S.B */
+                     DIP("CLEI_S.B w%d, w%d, %d", wd, ws, i5);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     char stemp = ((int)tmp << 27) >> 27;
+                     tmp = (UChar)stemp;
+                     tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                            (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                            (tmp << 8);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_OrV128, binop(Iop_CmpGT8Sx16,
+                                                      mkexpr(t2), mkexpr(t1)),
+                                                  binop(Iop_CmpEQ8x16,
+                                                      mkexpr(t1), mkexpr(t2))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* CLEI_S.H */
+                     DIP("CLEI_S.H w%d, w%d, %d", wd, ws, i5);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     short stemp = ((int)tmp << 27) >> 27;
+                     tmp = (UShort)stemp;
+                     tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_OrV128, binop(Iop_CmpGT16Sx8,
+                                                      mkexpr(t2), mkexpr(t1)),
+                                                  binop(Iop_CmpEQ16x8,
+                                                      mkexpr(t1), mkexpr(t2))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* CLEI_S.W */
+                     DIP("CLEI_S.W w%d, w%d, %d", wd, ws, i5);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     int stemp = ((int)tmp << 27) >> 27;
+                     tmp = (UInt)stemp;
+                     tmp |= (tmp << 32);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_CmpGT32Sx4,
+                                            mkexpr(t2), mkexpr(t1)),
+                                      binop(Iop_CmpEQ32x4,
+                                            mkexpr(t1), mkexpr(t2))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* CLEI_S.D */
+                     DIP("CLEI_S.D w%d, w%d, %d", wd, ws, i5);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     Long stemp = ((Long)tmp << 59) >> 59;
+                     tmp = stemp;
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_CmpGT64Sx2,
+                                            mkexpr(t2), mkexpr(t1)),
+                                      binop(Iop_CmpEQ64x2,
+                                            mkexpr(t1), mkexpr(t2))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x05: { /* CLEI_U.df */
+            ULong tmp = i5;
+
+            switch (df) {
+               case 0x00: { /* CLEI_U.B */
+                     DIP("CLEI_U.B w%d, w%d, %d", wd, ws, i5);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                            (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                            (tmp << 8);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_CmpGT8Ux16,
+                                            mkexpr(t2), mkexpr(t1)),
+                                      binop(Iop_CmpEQ8x16,
+                                            mkexpr(t1), mkexpr(t2))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* CLEI_U.H */
+                     DIP("CLEI_U.H w%d, w%d, %d", wd, ws, i5);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_CmpGT16Ux8,
+                                            mkexpr(t2), mkexpr(t1)),
+                                      binop(Iop_CmpEQ16x8,
+                                            mkexpr(t1), mkexpr(t2))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* CLEI_U.W */
+                     DIP("CLEI_U.W w%d, w%d, %d", wd, ws, i5);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     tmp |= (tmp << 32);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_CmpGT32Ux4,
+                                            mkexpr(t2), mkexpr(t1)),
+                                      binop(Iop_CmpEQ32x4,
+                                            mkexpr(t1), mkexpr(t2))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* CLEI_U.D */
+                     DIP("CLEI_U.D w%d, w%d, %d", wd, ws, i5);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_CmpGT64Ux2,
+                                            mkexpr(t2), mkexpr(t1)),
+                                      binop(Iop_CmpEQ64x2,
+                                            mkexpr(t1), mkexpr(t2))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+            }
+
+            break;
+         }
+
+      case 0x06: { /* LDI.df */
+            ULong tmp;
+            UShort s10;
+            s10 = (cins & 0x001FF800) >> 11;
+            switch (df) {
+               case 0x00: /* LDI.B */
+                  DIP("LDI.B w%d, %d", wd, s10);
+                  tmp = s10 & 0xFFl;
+                  tmp = tmp | (tmp << 8) | (tmp << 16) | (tmp << 24)
+                        | (tmp << 32) | (tmp << 40) | (tmp << 48) |
+                        (tmp << 56);
+                  break;
+
+               case 0x01: /* LDI.H */
+                  DIP("LDI.H w%d, %d", wd, s10);
+                  tmp = extend_s_10to16(s10);
+                  tmp = tmp | (tmp << 16) | (tmp << 32) | (tmp << 48);
+                  break;
+
+               case 0x02: /* LDI.W */
+                  DIP("LDI.W w%d, %d", wd, s10);
+                  tmp = extend_s_10to32(s10);
+                  tmp = tmp | (tmp << 32);
+                  break;
+
+               case 0x03: /* LDI.D */
+                  DIP("LDI.D w%d, %d", wd, s10);
+                  tmp = extend_s_10to64(s10);
+                  break;
+
+               default:
+                  return -1;
+            }
+
+            putWReg(wd, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp)));
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_BIT_09(UInt cins, UChar wd, UChar ws) { /* BIT (0x09) */
+   IRTemp t1, t2, t3;
+   UShort operation;
+   UChar df, m;
+
+   operation = (cins & 0x03800000) >> 23;
+   df = (cins & 0x007F0000) >> 16;
+
+   if ((df & 0x70) == 0x70) {        // 111mmmm; b
+      m = df & 0x07;
+      df = 0;
+   } else if ((df & 0x60) == 0x60) { // 110mmmm; h
+      m = df & 0x0F;
+      df = 1;
+   } else if ((df & 0x40) == 0x40) { // 10mmmmm; w
+      m = df & 0x1F;
+      df = 2;
+   } else if ((df & 0x00) == 0x00) { // 0mmmmmm; d
+      m = df & 0x3F;
+      df = 3;
+   }
+
+   switch (operation) {
+      case 0x00: { /* SLLI.df */
+            switch (df) {
+               case 0x00: { /* SLLI.B */
+                     DIP("SLLI.B w%d, w%d, %d", wd, ws, m);
+                     putWReg(wd, binop(Iop_ShlN8x16, getWReg(ws), mkU8(m)));
+                     break;
+                  }
+
+               case 0x01: { /* SLLI.H */
+                     DIP("SLLI.H w%d, w%d, %d", wd, ws, m);
+                     putWReg(wd, binop(Iop_ShlN16x8, getWReg(ws), mkU8(m)));
+                     break;
+                  }
+
+               case 0x02: { /* SLLI.W */
+                     DIP("SLLI.W w%d, w%d, %d", wd, ws, m);
+                     putWReg(wd, binop(Iop_ShlN32x4, getWReg(ws), mkU8(m)));
+                     break;
+                  }
+
+               case 0x03: { /* SLLI.D */
+                     DIP("SLLI.D w%d, w%d, %d", wd, ws, m);
+                     putWReg(wd, binop(Iop_ShlN64x2, getWReg(ws), mkU8(m)));
+                     break;
+                  }
+            }
+
+            break;
+         }
+
+      case 0x01: { /* SRAI.df */
+            switch (df) {
+               case 0x00: { /* SRAI.B */
+                     DIP("SRAI.B w%d, w%d, %d", wd, ws, m);
+                     putWReg(wd, binop(Iop_SarN8x16, getWReg(ws), mkU8(m)));
+                     break;
+                  }
+
+               case 0x01: { /* SRAI.H */
+                     DIP("SRAI.H w%d, w%d, %d", wd, ws, m);
+                     putWReg(wd, binop(Iop_SarN16x8, getWReg(ws), mkU8(m)));
+                     break;
+                  }
+
+               case 0x02: { /* SRAI.W */
+                     DIP("SRAI.W w%d, w%d, %d", wd, ws, m);
+                     putWReg(wd, binop(Iop_SarN32x4, getWReg(ws), mkU8(m)));
+                     break;
+                  }
+
+               case 0x03: { /* SRAI.D */
+                     DIP("SRAI.D w%d, w%d, %d", wd, ws, m);
+                     putWReg(wd, binop(Iop_SarN64x2, getWReg(ws), mkU8(m)));
+                     break;
+                  }
+            }
+
+            break;
+         }
+
+      case 0x02: { /* SRLI.df */
+            switch (df) {
+               case 0x00: { /* SRLI.B */
+                     DIP("SRLI.B w%d, w%d, %d", wd, ws, m);
+                     putWReg(wd, binop(Iop_ShrN8x16, getWReg(ws), mkU8(m)));
+                     break;
+                  }
+
+               case 0x01: { /* SRLI.H */
+                     DIP("SRLI.H w%d, w%d, %d", wd, ws, m);
+                     putWReg(wd, binop(Iop_ShrN16x8, getWReg(ws), mkU8(m)));
+                     break;
+                  }
+
+               case 0x02: { /* SRLI.W */
+                     DIP("SRLI.W w%d, w%d, %d", wd, ws, m);
+                     putWReg(wd, binop(Iop_ShrN32x4, getWReg(ws), mkU8(m)));
+                     break;
+                  }
+
+               case 0x03: { /* SRLI.D */
+                     DIP("SRLI.D w%d, w%d, %d", wd, ws, m);
+                     putWReg(wd, binop(Iop_ShrN64x2, getWReg(ws), mkU8(m)));
+                     break;
+                  }
+            }
+
+            break;
+         }
+
+      case 0x03: { /* BCLRI.df */
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            t3 = newTemp(Ity_V128);
+            ULong tmp = 1;
+            assign(t1, getWReg(ws));
+
+            switch (df) {
+               case 0x00: { /* BCLRI.B */
+                     DIP("BCLRI.B w%d, w%d, %d", wd, ws, m);
+                     tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                            (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                            (tmp << 8);
+                     assign(t2, binop(Iop_ShlN8x16,
+                                      binop(Iop_64HLtoV128,
+                                            mkU64(tmp), mkU64(tmp)),mkU8(m)));
+                     break;
+                  }
+
+               case 0x01: { /* BCLRI.H */
+                     DIP("BCLRI.H w%d, w%d, %d", wd, ws, m);
+                     tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16);
+                     assign(t2, binop(Iop_ShlN16x8,
+                                      binop(Iop_64HLtoV128,
+                                            mkU64(tmp), mkU64(tmp)), mkU8(m)));
+                     break;
+                  }
+
+               case 0x02: { /* BCLRI.W */
+                     DIP("BCLRI.W w%d, w%d, %d", wd, ws, m);
+                     tmp |= (tmp << 32);
+                     assign(t2, binop(Iop_ShlN32x4,
+                                      binop(Iop_64HLtoV128,
+                                            mkU64(tmp), mkU64(tmp)), mkU8(m)));
+                     break;
+                  }
+
+               case 0x03: { /* BCLRI.D */
+                     DIP("BCLRI.D w%d, w%d, %d", wd, ws, m);
+                     assign(t2, binop(Iop_ShlN64x2,
+                                      binop(Iop_64HLtoV128,
+                                            mkU64(tmp), mkU64(tmp)), mkU8(m)));
+                     break;
+                  }
+            }
+
+            assign(t3, binop(Iop_AndV128,
+                             mkexpr(t1), unop(Iop_NotV128, mkexpr(t2))));
+            putWReg(wd, mkexpr(t3));
+            break;
+         }
+
+      case 0x04: { /* BSETI */
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            t3 = newTemp(Ity_V128);
+            ULong tmp = 1;
+            assign(t1, getWReg(ws));
+
+            switch (df) {
+               case 0x00: { /* BSETI.B */
+                     DIP("BSETI.B w%d, w%d, %d", wd, ws, m);
+                     tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                            (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                            (tmp << 8);
+                     assign(t2, binop(Iop_ShlN8x16,
+                                      binop(Iop_64HLtoV128,
+                                            mkU64(tmp), mkU64(tmp)), mkU8(m)));
+                     break;
+                  }
+
+               case 0x01: { /* BSETI.H */
+                     DIP("BSETI.H w%d, w%d, %d", wd, ws, m);
+                     tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16);
+                     assign(t2, binop(Iop_ShlN16x8,
+                                      binop(Iop_64HLtoV128,
+                                            mkU64(tmp), mkU64(tmp)), mkU8(m)));
+                     break;
+                  }
+
+               case 0x02: { /* BSETI.W */
+                     DIP("BSETI.W w%d, w%d, %d", wd, ws, m);
+                     tmp |= (tmp << 32);
+                     assign(t2, binop(Iop_ShlN32x4,
+                                      binop(Iop_64HLtoV128,
+                                            mkU64(tmp), mkU64(tmp)), mkU8(m)));
+                     break;
+                  }
+
+               case 0x03: { /* BSETI.D */
+                     DIP("BSETI.D w%d, w%d, %d", wd, ws, m);
+                     assign(t2, binop(Iop_ShlN64x2,
+                                      binop(Iop_64HLtoV128,
+                                            mkU64(tmp), mkU64(tmp)), mkU8(m)));
+                     break;
+                  }
+            }
+
+            assign(t3, binop(Iop_OrV128, mkexpr(t1), mkexpr(t2)));
+            putWReg(wd, mkexpr(t3));
+            break;
+         }
+
+      case 0x05: { /* BNEGI.df */
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            t3 = newTemp(Ity_V128);
+            ULong tmp = 1;
+            assign(t1, getWReg(ws));
+
+            switch (df) {
+               case 0x00: { /* BNEGI.B */
+                     DIP("BNEGI.B w%d, w%d, %d", wd, ws, m);
+                     tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                            (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                            (tmp << 8);
+                     assign(t2, binop(Iop_ShlN8x16,
+                                      binop(Iop_64HLtoV128,
+                                            mkU64(tmp), mkU64(tmp)), mkU8(m)));
+                     break;
+                  }
+
+               case 0x01: { /* BNEGI.H */
+                     DIP("BNEGI.H w%d, w%d, %d", wd, ws, m);
+                     tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16);
+                     assign(t2, binop(Iop_ShlN16x8,
+                                      binop(Iop_64HLtoV128,
+                                            mkU64(tmp), mkU64(tmp)), mkU8(m)));
+                     break;
+                  }
+
+               case 0x02: { /* BNEGI.W */
+                     DIP("BNEGI.W w%d, w%d, %d", wd, ws, m);
+                     tmp |= (tmp << 32);
+                     assign(t2, binop(Iop_ShlN32x4,
+                                      binop(Iop_64HLtoV128,
+                                            mkU64(tmp), mkU64(tmp)), mkU8(m)));
+                     break;
+                  }
+
+               case 0x03: { /* BNEGI.D */
+                     DIP("BNEGI.D w%d, w%d, %d", wd, ws, m);
+                     assign(t2, binop(Iop_ShlN64x2,
+                                      binop(Iop_64HLtoV128,
+                                            mkU64(tmp), mkU64(tmp)), mkU8(m)));
+                     break;
+                  }
+            }
+
+            assign(t3, binop(Iop_XorV128, mkexpr(t1), mkexpr(t2)));
+            putWReg(wd, mkexpr(t3));
+            break;
+         }
+
+      case 0x06: { /* BINSLI.df */
+            switch (df) {
+               case 0x00: { /* BINSLI.B */
+                     DIP("BINSLI.B w%d, w%d, w%d", wd, ws, m);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     ULong tmp = 0x8080808080808080ULL;
+                     assign(t1, binop(Iop_SarN8x16,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)), mkU8(m)));
+                     assign(t2,
+                            binop(Iop_AndV128,
+                                  unop(Iop_NotV128, mkexpr(t1)), getWReg(wd)));
+                     assign(t3,
+                            binop(Iop_AndV128,
+                                  mkexpr(t1), getWReg(ws)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t2), mkexpr(t3)));
+                     break;
+                  }
+
+               case 0x01: { /* BINSLI.H */
+                     DIP("BINSLI.H w%d, w%d, w%d", wd, ws, m);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     ULong tmp = 0x8000800080008000ULL;
+                     assign(t1,
+                            binop(Iop_SarN16x8,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)), mkU8(m)));
+                     assign(t2,
+                            binop(Iop_AndV128,
+                                  unop(Iop_NotV128, mkexpr(t1)), getWReg(wd)));
+                     assign(t3,
+                            binop(Iop_AndV128,
+                                  mkexpr(t1), getWReg(ws)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t2), mkexpr(t3)));
+                     break;
+                  }
+
+               case 0x02: { /* BINSLI.W */
+                     DIP("BINSLI.W w%d, w%d, w%d", wd, ws, m);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     ULong tmp = 0x8000000080000000ULL;
+                     assign(t1,
+                            binop(Iop_SarN32x4,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)), mkU8(m)));
+                     assign(t2,
+                            binop(Iop_AndV128,
+                                  unop(Iop_NotV128, mkexpr(t1)), getWReg(wd)));
+                     assign(t3,
+                            binop(Iop_AndV128,
+                                  mkexpr(t1), getWReg(ws)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t2), mkexpr(t3)));
+                     break;
+                  }
+
+               case 0x03: { /* BINSLI.D */
+                     DIP("BINSLI.D w%d, w%d, w%d", wd, ws, m);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     ULong tmp = 0x8000000000000000ULL;
+                     assign(t1,
+                            binop(Iop_SarN64x2,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)), mkU8(m)));
+                     assign(t2,
+                            binop(Iop_AndV128,
+                                  unop(Iop_NotV128, mkexpr(t1)), getWReg(wd)));
+                     assign(t3,
+                            binop(Iop_AndV128,
+                                  mkexpr(t1), getWReg(ws)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t2), mkexpr(t3)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x07: {
+            switch (df) {
+               case 0x00: { /* BINSRI.B */
+                     DIP("BINSRI.B w%d, w%d, w%d", wd, ws, m);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     ULong tmp = 0xFEFEFEFEFEFEFEFEULL;
+                     assign(t1,
+                            binop(Iop_ShlN8x16,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)), mkU8(m)));
+                     assign(t2,
+                            binop(Iop_AndV128,
+                                  unop(Iop_NotV128, mkexpr(t1)), getWReg(ws)));
+                     assign(t3,
+                            binop(Iop_AndV128,
+                                  mkexpr(t1), getWReg(wd)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t2), mkexpr(t3)));
+                     break;
+                  }
+
+               case 0x01: { /* BINSRI.H */
+                     DIP("BINSRI.H w%d, w%d, w%d", wd, ws, m);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     ULong tmp = 0xFFFEFFFEFFFEFFFEULL;
+                     assign(t1,
+                            binop(Iop_ShlN16x8,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  mkU8(m)));
+                     assign(t2,
+                            binop(Iop_AndV128,
+                                  unop(Iop_NotV128, mkexpr(t1)),
+                                  getWReg(ws)));
+                     assign(t3,
+                            binop(Iop_AndV128,
+                                  mkexpr(t1), getWReg(wd)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t2), mkexpr(t3)));
+                     break;
+                  }
+
+               case 0x02: { /* BINSRI.W */
+                     DIP("BINSRI.W w%d, w%d, w%d", wd, ws, m);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     ULong tmp = 0xFFFFFFFEFFFFFFFEULL;
+                     assign(t1,
+                            binop(Iop_ShlN32x4,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  mkU8(m)));
+                     assign(t2,
+                            binop(Iop_AndV128,
+                                  unop(Iop_NotV128, mkexpr(t1)),
+                                  getWReg(ws)));
+                     assign(t3,
+                            binop(Iop_AndV128,
+                                  mkexpr(t1), getWReg(wd)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t2), mkexpr(t3)));
+                     break;
+                  }
+
+               case 0x03: { /* BINSRI.D */
+                     DIP("BINSRI.D w%d, w%d, w%d", wd, ws, m);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     ULong tmp = -2;
+                     assign(t1,
+                            binop(Iop_ShlN64x2,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  mkU8(m)));
+                     assign(t2,
+                            binop(Iop_AndV128,
+                                  unop(Iop_NotV128, mkexpr(t1)),
+                                  getWReg(ws)));
+                     assign(t3,
+                            binop(Iop_AndV128,
+                                  mkexpr(t1), getWReg(wd)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t2), mkexpr(t3)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_BIT_0A(UInt cins, UChar wd, UChar ws) { /* BIT (0x0A) */
+   IRTemp t1, t2;
+   UShort operation;
+   UChar df, m;
+
+   operation = (cins & 0x03800000) >> 23;
+   df = (cins & 0x007F0000) >> 16;
+
+   if ((df & 0x70) == 0x70) {        // 111mmmm; b
+      m = df & 0x07;
+      df = 0;
+   } else if ((df & 0x60) == 0x60) { // 110mmmm; h
+      m = df & 0x0F;
+      df = 1;
+   } else if ((df & 0x40) == 0x40) { // 10mmmmm; w
+      m = df & 0x1F;
+      df = 2;
+   } else if ((df & 0x00) == 0x00) { // 0mmmmmm; d
+      m = df & 0x3F;
+      df = 3;
+   }
+
+   switch (operation) {
+      case 0x00: { /* SAT_S.df */
+            switch (df) {
+               case 0x00: { /* SAT_S.B */
+                     DIP("SAT_S.B w%d, w%d, %d", wd, ws, m);
+                     t1 = newTemp(Ity_V128);
+                     assign(t1, binop(Iop_SarN8x16, getWReg(ws), mkU8(7)));
+
+                     if (m == 0) {
+                        putWReg(wd, mkexpr(t1));
+                     } else {
+                        t2 = newTemp(Ity_V128);
+                        assign(t2,
+                               binop(Iop_SarN8x16, getWReg(ws), mkU8(m)));
+                        putWReg(wd,
+                                binop(Iop_OrV128,
+                                      binop(Iop_OrV128,
+                                            binop(Iop_AndV128,
+                                                  binop(Iop_CmpEQ8x16,
+                                                        mkexpr(t1),
+                                                        mkexpr(t2)),
+                                                  getWReg(ws)),
+                                            binop(Iop_ShlN8x16,
+                                                  binop(Iop_CmpGT8Sx16,
+                                                        mkexpr(t1),
+                                                        mkexpr(t2)),
+                                                  mkU8(m))),
+                                      binop(Iop_ShrN8x16,
+                                            binop(Iop_CmpGT8Sx16,
+                                                  mkexpr(t2),
+                                                  mkexpr(t1)),
+                                            mkU8(8 - m))));
+                     }
+
+                     break;
+                  }
+
+               case 0x01: { /* SAT_S.H */
+                     DIP("SAT_S.H w%d, w%d, %d", wd, ws, m);
+                     t1 = newTemp(Ity_V128);
+                     assign(t1, binop(Iop_SarN16x8, getWReg(ws), mkU8(15)));
+
+                     if (m == 0) {
+                        putWReg(wd, mkexpr(t1));
+                     } else {
+                        t2 = newTemp(Ity_V128);
+                        assign(t2,
+                               binop(Iop_SarN16x8,
+                                     getWReg(ws),
+                                     mkU8(m)));
+                        putWReg(wd,
+                                binop(Iop_OrV128,
+                                      binop(Iop_OrV128,
+                                            binop(Iop_AndV128,
+                                                  binop(Iop_CmpEQ16x8,
+                                                        mkexpr(t1),
+                                                        mkexpr(t2)),
+                                                  getWReg(ws)),
+                                            binop(Iop_ShlN16x8,
+                                                  binop(Iop_CmpGT16Sx8,
+                                                        mkexpr(t1),
+                                                        mkexpr(t2)),
+                                                  mkU8(m))),
+                                      binop(Iop_ShrN16x8,
+                                            binop(Iop_CmpGT16Sx8,
+                                                  mkexpr(t2),
+                                                  mkexpr(t1)),
+                                            mkU8(16 - m))));
+                     }
+
+                     break;
+                  }
+
+               case 0x02: { /* SAT_S.W */
+                     DIP("SAT_S.W w%d, w%d, %d", wd, ws, m);
+                     t1 = newTemp(Ity_V128);
+                     assign(t1, binop(Iop_SarN32x4, getWReg(ws), mkU8(31)));
+
+                     if (m == 0) {
+                        putWReg(wd, mkexpr(t1));
+                     } else {
+                        t2 = newTemp(Ity_V128);
+                        assign(t2,
+                               binop(Iop_SarN32x4,
+                                     getWReg(ws),
+                                     mkU8(m)));
+                        putWReg(wd,
+                                binop(Iop_OrV128,
+                                      binop(Iop_OrV128,
+                                            binop(Iop_AndV128,
+                                                  binop(Iop_CmpEQ32x4,
+                                                        mkexpr(t1),
+                                                        mkexpr(t2)),
+                                                  getWReg(ws)),
+                                            binop(Iop_ShlN32x4,
+                                                  binop(Iop_CmpGT32Sx4,
+                                                        mkexpr(t1),
+                                                        mkexpr(t2)),
+                                                  mkU8(m))),
+                                      binop(Iop_ShrN32x4,
+                                            binop(Iop_CmpGT32Sx4,
+                                                  mkexpr(t2),
+                                                  mkexpr(t1)),
+                                            mkU8(32 - m))));
+                     }
+
+                     break;
+                  }
+
+               case 0x03: { /* SAT_S.D */
+                     DIP("SAT_S.D w%d, w%d, %d", wd, ws, m);
+                     t1 = newTemp(Ity_V128);
+                     assign(t1, binop(Iop_SarN64x2, getWReg(ws), mkU8(63)));
+
+                     if (m == 0) {
+                        putWReg(wd, mkexpr(t1));
+                     } else {
+                        t2 = newTemp(Ity_V128);
+                        assign(t2,
+                               binop(Iop_SarN64x2,
+                                     getWReg(ws),
+                                     mkU8(m)));
+                        putWReg(wd,
+                                binop(Iop_OrV128,
+                                      binop(Iop_OrV128,
+                                            binop(Iop_AndV128,
+                                                  binop(Iop_CmpEQ64x2,
+                                                        mkexpr(t1),
+                                                        mkexpr(t2)),
+                                                  getWReg(ws)),
+                                            binop(Iop_ShlN64x2,
+                                                  binop(Iop_CmpGT64Sx2,
+                                                        mkexpr(t1),
+                                                        mkexpr(t2)),
+                                                  mkU8(m))),
+                                      binop(Iop_ShrN64x2,
+                                            binop(Iop_CmpGT64Sx2,
+                                                  mkexpr(t2),
+                                                  mkexpr(t1)),
+                                            mkU8(64 - m))));
+                     }
+
+                     break;
+                  }
+            }
+
+            break;
+         }
+
+      case 0x01: { /* SAT_U.df */
+            switch (df) {
+               case 0x00: { /* SAT_U.B */
+                     DIP("SAT_U.B w%d, w%d, %d", wd, ws, m);
+
+                     if (m == 7) {
+                        putWReg(wd, getWReg(ws));
+                     } else {
+                        t1 = newTemp(Ity_V128);
+                        assign(t1,
+                               binop(Iop_CmpEQ8x16,
+                                     binop(Iop_ShrN8x16,
+                                           getWReg(ws),
+                                           mkU8(m + 1)),
+                                     binop(Iop_64HLtoV128,
+                                           mkU64(0), mkU64(0))));
+                        putWReg(wd,
+                                binop(Iop_OrV128,
+                                      binop(Iop_AndV128,
+                                            mkexpr(t1),
+                                            getWReg(ws)),
+                                      binop(Iop_ShrN8x16,
+                                            unop(Iop_NotV128,
+                                                 mkexpr(t1)),
+                                            mkU8(7 - m))));
+                     }
+
+                     break;
+                  }
+
+               case 0x01: { /* SAT_U.H */
+                     DIP("SAT_U.H w%d, w%d, %d", wd, ws, m);
+
+                     if (m == 15) {
+                        putWReg(wd, getWReg(ws));
+                     } else {
+                        t1 = newTemp(Ity_V128);
+                        assign(t1,
+                               binop(Iop_CmpEQ16x8,
+                                     binop(Iop_ShrN16x8,
+                                           getWReg(ws),
+                                           mkU8(m + 1)),
+                                     binop(Iop_64HLtoV128,
+                                           mkU64(0), mkU64(0))));
+                        putWReg(wd,
+                                binop(Iop_OrV128,
+                                      binop(Iop_AndV128,
+                                            mkexpr(t1),
+                                            getWReg(ws)),
+                                      binop(Iop_ShrN16x8,
+                                            unop(Iop_NotV128,
+                                                 mkexpr(t1)),
+                                            mkU8(15 - m))));
+                     }
+
+                     break;
+                  }
+
+               case 0x02: { /* SAT_U.W */
+                     DIP("SAT_U.W w%d, w%d, %d", wd, ws, m);
+
+                     if (m == 31) {
+                        putWReg(wd, getWReg(ws));
+                     } else {
+                        t1 = newTemp(Ity_V128);
+                        assign(t1,
+                               binop(Iop_CmpEQ32x4,
+                                     binop(Iop_ShrN32x4,
+                                           getWReg(ws),
+                                           mkU8(m + 1)),
+                                     binop(Iop_64HLtoV128,
+                                           mkU64(0), mkU64(0))));
+                        putWReg(wd,
+                                binop(Iop_OrV128,
+                                      binop(Iop_AndV128,
+                                            mkexpr(t1), \
+                                            getWReg(ws)),
+                                      binop(Iop_ShrN32x4,
+                                            unop(Iop_NotV128,
+                                                 mkexpr(t1)),
+                                            mkU8(31 - m))));
+                     }
+
+                     break;
+                  }
+
+               case 0x03: { /* SAT_U.D */
+                     DIP("SAT_U.D w%d, w%d, %d", wd, ws, m);
+
+                     if (m == 63) {
+                        putWReg(wd, getWReg(ws));
+                     } else {
+                        t1 = newTemp(Ity_V128);
+                        assign(t1,
+                               binop(Iop_CmpEQ64x2,
+                                     binop(Iop_ShrN64x2,
+                                           getWReg(ws),
+                                           mkU8(m + 1)),
+                                     binop(Iop_64HLtoV128,
+                                           mkU64(0), mkU64(0))));
+                        putWReg(wd,
+                                binop(Iop_OrV128,
+                                      binop(Iop_AndV128,
+                                            mkexpr(t1),
+                                            getWReg(ws)),
+                                      binop(Iop_ShrN64x2,
+                                            unop(Iop_NotV128,
+                                                 mkexpr(t1)),
+                                            mkU8(63 - m))));
+                     }
+
+                     break;
+                  }
+            }
+
+            break;
+         }
+
+      case 0x02: { /* SRARI.df */
+            switch (df) {
+               case 0x00: { /* SRARI.B */
+                     DIP("SRARI.B w%d, w%d, %d", wd, ws, m);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_SarN8x16,
+                                  getWReg(ws),
+                                  mkU8(m)));
+                     assign(t2,
+                            binop(Iop_ShrN8x16,
+                                  binop(Iop_ShlN8x16,
+                                        getWReg(ws),
+                                        mkU8(8 - m)),
+                                  mkU8(7)));
+
+                     if (m) putWReg(wd, binop(Iop_Add8x16,
+                                              mkexpr(t1),
+                                              mkexpr(t2)));
+                     else putWReg(wd, mkexpr(t1));
+
+                     break;
+                  }
+
+               case 0x01: { /* SRARI.H */
+                     DIP("SRARI.H w%d, w%d, %d", wd, ws, m);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_SarN16x8,
+                                  getWReg(ws),
+                                  mkU8(m)));
+                     assign(t2,
+                            binop(Iop_ShrN16x8,
+                                  binop(Iop_ShlN16x8,
+                                        getWReg(ws),
+                                        mkU8(16 - m)),
+                                  mkU8(15)));
+
+                     if (m)
+                        putWReg(wd,
+                                binop(Iop_Add16x8,
+                                      mkexpr(t1), mkexpr(t2)));
+                     else putWReg(wd, mkexpr(t1));
+
+                     break;
+                  }
+
+               case 0x02: { /* SRARI.W */
+                     DIP("SRARI.W w%d, w%d, %d", wd, ws, m);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_SarN32x4,
+                                  getWReg(ws),
+                                  mkU8(m)));
+                     assign(t2,
+                            binop(Iop_ShrN32x4,
+                                  binop(Iop_ShlN32x4,
+                                        getWReg(ws),
+                                        mkU8(32 - m)),
+                                  mkU8(31)));
+
+                     if (m)
+                        putWReg(wd,
+                                binop(Iop_Add32x4,
+                                      mkexpr(t1), mkexpr(t2)));
+                     else putWReg(wd, mkexpr(t1));
+
+                     break;
+                  }
+
+               case 0x03: { /* SRARI.D */
+                     DIP("SRARI.D w%d, w%d, %d", wd, ws, m);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_SarN64x2,
+                                  getWReg(ws),
+                                  mkU8(m)));
+                     assign(t2,
+                            binop(Iop_ShrN64x2,
+                                  binop(Iop_ShlN64x2,
+                                        getWReg(ws),
+                                        mkU8(64 - m)),
+                                  mkU8(63)));
+
+                     if (m)
+                        putWReg(wd,
+                                binop(Iop_Add64x2,
+                                      mkexpr(t1), mkexpr(t2)));
+                     else putWReg(wd, mkexpr(t1));
+
+                     break;
+                  }
+            }
+
+            break;
+         }
+
+      case 0x03: { /* SRLRI.df */
+            switch (df) {
+               case 0x00: { /* SRLRI.B */
+                     DIP("SRLRI.B w%d, w%d, %d", wd, ws, m);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_ShrN8x16,
+                                  getWReg(ws),
+                                  mkU8(m)));
+                     assign(t2,
+                            binop(Iop_ShrN8x16,
+                                  binop(Iop_ShlN8x16,
+                                        getWReg(ws),
+                                        mkU8(8 - m)),
+                                  mkU8(7)));
+
+                     if (m)
+                        putWReg(wd,
+                                binop(Iop_Add8x16,
+                                      mkexpr(t1), mkexpr(t2)));
+                     else putWReg(wd, mkexpr(t1));
+
+                     break;
+                  }
+
+               case 0x01: { /* SRLRI.H */
+                     DIP("SRLRI.H w%d, w%d, %d", wd, ws, m);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_ShrN16x8,
+                                  getWReg(ws),
+                                  mkU8(m)));
+                     assign(t2,
+                            binop(Iop_ShrN16x8,
+                                  binop(Iop_ShlN16x8,
+                                        getWReg(ws),
+                                        mkU8(16 - m)),
+                                  mkU8(15)));
+
+                     if (m)
+                        putWReg(wd,
+                                binop(Iop_Add16x8,
+                                      mkexpr(t1), mkexpr(t2)));
+                     else putWReg(wd, mkexpr(t1));
+
+                     break;
+                  }
+
+               case 0x02: { /* SRLRI.W */
+                     DIP("SRLRI.W w%d, w%d, %d", wd, ws, m);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_ShrN32x4,
+                                  getWReg(ws),
+                                  mkU8(m)));
+                     assign(t2,
+                            binop(Iop_ShrN32x4,
+                                  binop(Iop_ShlN32x4,
+                                        getWReg(ws),
+                                        mkU8(32 - m)),
+                                  mkU8(31)));
+
+                     if (m)
+                        putWReg(wd,
+                                binop(Iop_Add32x4,
+                                      mkexpr(t1), mkexpr(t2)));
+                     else putWReg(wd, mkexpr(t1));
+
+                     break;
+                  }
+
+               case 0x03: { /* SRLRI.D */
+                     DIP("SRLRI.D w%d, w%d, %d", wd, ws, m);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_ShrN64x2,
+                                  getWReg(ws),
+                                  mkU8(m)));
+                     assign(t2,
+                            binop(Iop_ShrN64x2,
+                                  binop(Iop_ShlN64x2,
+                                        getWReg(ws),
+                                        mkU8(64 - m)),
+                                  mkU8(63)));
+
+                     if (m)
+                        putWReg(wd,
+                                binop(Iop_Add64x2,
+                                      mkexpr(t1), mkexpr(t2)));
+                     else putWReg(wd, mkexpr(t1));
+
+                     break;
+                  }
+            }
+
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_3R_0D(UInt cins, UChar wd, UChar ws) { /* 3R (0x0D) */
+   IRTemp t1, t2, t3;
+   UShort operation;
+   UChar df, wt;
+
+   operation = (cins & 0x03800000) >> 23;
+   df = (cins & 0x00600000) >> 21;
+   wt = (cins & 0x001F0000) >> 16;
+
+   switch (operation) {
+      case 0x00: { /* SLL.df */
+            switch (df) {
+               case 0x00: { /* SLL.B */
+                     DIP("SLL.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Shl8x16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* SLL.H */
+                     DIP("SLL.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Shl16x8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* SLL.W */
+                     DIP("SLL.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Shl32x4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* SLL.D */
+                     DIP("SLL.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Shl64x2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x01: { /* SRA.df */
+            switch (df) {
+               case 0x00: { /* SRA.B */
+                     DIP("SRA.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Sar8x16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* SRA.H */
+                     DIP("SRA.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Sar16x8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* SRA.W */
+                     DIP("SRA.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Sar32x4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* SRA.D */
+                     DIP("SRA.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Sar64x2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x02: { /* SRL.df */
+            switch (df) {
+               case 0x00: { /* SRL.B */
+                     DIP("SRL.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Shr8x16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* SRL.H */
+                     DIP("SRL.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Shr16x8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* SRL.W */
+                     DIP("SRL.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Shr32x4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* SRL.D */
+                     DIP("SRL.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Shr64x2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x03: { /* BCLR.df */
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            t3 = newTemp(Ity_V128);
+            ULong tmp = 1;
+            assign(t1, getWReg(ws));
+
+            switch (df) {
+               case 0x00: { /* BCLR.B */
+                     DIP("BCLR.B w%d, w%d, w%d", wd, ws, wt);
+                     tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                            (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                            (tmp << 8);
+                     assign(t2, binop(Iop_Shl8x16,
+                                      binop(Iop_64HLtoV128,
+                                            mkU64(tmp), mkU64(tmp)),
+                                      getWReg(wt)));
+                     break;
+                  }
+
+               case 0x01: { /* BCLR.H */
+                     DIP("BCLR.H w%d, w%d, w%d", wd, ws, wt);
+                     tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16);
+                     assign(t2,
+                            binop(Iop_Shl16x8,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  getWReg(wt)));
+                     break;
+                  }
+
+               case 0x02: { /* BCLR.W */
+                     DIP("BCLR.W w%d, w%d, w%d", wd, ws, wt);
+                     tmp |= (tmp << 32);
+                     assign(t2,
+                            binop(Iop_Shl32x4,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  getWReg(wt)));
+                     break;
+                  }
+
+               case 0x03: { /* BCLR.D */
+                     DIP("BCLR.D w%d, w%d, w%d", wd, ws, wt);
+                     assign(t2,
+                            binop(Iop_Shl64x2,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  getWReg(wt)));
+                     break;
+                  }
+            }
+
+            assign(t3,
+                   binop(Iop_AndV128,
+                         mkexpr(t1), unop(Iop_NotV128, mkexpr(t2))));
+            putWReg(wd, mkexpr(t3));
+            break;
+         }
+
+      case 0x04: { /* BSET.df */
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            t3 = newTemp(Ity_V128);
+            ULong tmp = 1;
+            assign(t1, getWReg(ws));
+
+            switch (df) {
+               case 0x00: { /* BSET.B */
+                     DIP("BSET.B w%d, w%d, w%d", wd, ws, wt);
+                     tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                            (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                            (tmp << 8);
+                     assign(t2,
+                            binop(Iop_Shl8x16,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  getWReg(wt)));
+                     break;
+                  }
+
+               case 0x01: { /* BSET.H */
+                     DIP("BSET.H w%d, w%d, w%d", wd, ws, wt);
+                     tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16);
+                     assign(t2,
+                            binop(Iop_Shl16x8,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  getWReg(wt)));
+                     break;
+                  }
+
+               case 0x02: { /* BSET.W */
+                     DIP("BSET.W w%d, w%d, w%d", wd, ws, wt);
+                     tmp |= (tmp << 32);
+                     assign(t2,
+                            binop(Iop_Shl32x4,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  getWReg(wt)));
+                     break;
+                  }
+
+               case 0x03: { /* BSET.D */
+                     DIP("BSET.D w%d, w%d, w%d", wd, ws, wt);
+                     assign(t2,
+                            binop(Iop_Shl64x2,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  getWReg(wt)));
+                     break;
+                  }
+            }
+
+            assign(t3, binop(Iop_OrV128, mkexpr(t1), mkexpr(t2)));
+            putWReg(wd, mkexpr(t3));
+            break;
+         }
+
+      case 0x05: { /* BNEG.df */
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            t3 = newTemp(Ity_V128);
+            ULong tmp = 1;
+            assign(t1, getWReg(ws));
+
+            switch (df) {
+               case 0x00: { /* BNEG.B */
+                     DIP("BNEG.B w%d, w%d, w%d", wd, ws, wt);
+                     tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) |
+                            (tmp << 32) | (tmp << 24) | (tmp << 16) |
+                            (tmp << 8);
+                     assign(t2,
+                            binop(Iop_Shl8x16,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  getWReg(wt)));
+                     break;
+                  }
+
+               case 0x01: { /* BNEG.H */
+                     DIP("BNEG.H w%d, w%d, w%d", wd, ws, wt);
+                     tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16);
+                     assign(t2,
+                            binop(Iop_Shl16x8,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  getWReg(wt)));
+                     break;
+                  }
+
+               case 0x02: { /* BNEG.W */
+                     DIP("BNEG.W w%d, w%d, w%d", wd, ws, wt);
+                     tmp |= (tmp << 32);
+                     assign(t2,
+                            binop(Iop_Shl32x4,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  getWReg(wt)));
+                     break;
+                  }
+
+               case 0x03: { /* BNEG.D */
+                     DIP("BNEG.D w%d, w%d, w%d", wd, ws, wt);
+                     assign(t2,
+                            binop(Iop_Shl64x2,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  getWReg(wt)));
+                     break;
+                  }
+            }
+
+            assign(t3, binop(Iop_XorV128, mkexpr(t1), mkexpr(t2)));
+            putWReg(wd, mkexpr(t3));
+            break;
+         }
+
+      case 0x06: { /* BINSL.df */
+            switch (df) {
+               case 0x00: { /* BINSL.B */
+                     DIP("BINSL.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     ULong tmp = 0x8080808080808080ULL;
+                     assign(t1,
+                            binop(Iop_Sar8x16,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  getWReg(wt)));
+                     assign(t2,
+                            binop(Iop_AndV128,
+                                  unop(Iop_NotV128, mkexpr(t1)),
+                                  getWReg(wd)));
+                     assign(t3,
+                            binop(Iop_AndV128,
+                                  mkexpr(t1), getWReg(ws)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t2), mkexpr(t3)));
+                     break;
+                  }
+
+               case 0x01: { /* BINSL.H */
+                     DIP("BINSL.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     ULong tmp = 0x8000800080008000ULL;
+                     assign(t1,
+                            binop(Iop_Sar16x8,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  getWReg(wt)));
+                     assign(t2,
+                            binop(Iop_AndV128,
+                                  unop(Iop_NotV128, mkexpr(t1)),
+                                  getWReg(wd)));
+                     assign(t3,
+                            binop(Iop_AndV128,
+                                  mkexpr(t1), getWReg(ws)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t2), mkexpr(t3)));
+                     break;
+                  }
+
+               case 0x02: { /* BINSL.W */
+                     DIP("BINSL.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     ULong tmp = 0x8000000080000000ULL;
+                     assign(t1,
+                            binop(Iop_Sar32x4,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  getWReg(wt)));
+                     assign(t2,
+                            binop(Iop_AndV128,
+                                  unop(Iop_NotV128, mkexpr(t1)),
+                                  getWReg(wd)));
+                     assign(t3,
+                            binop(Iop_AndV128,
+                                  mkexpr(t1), getWReg(ws)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t2), mkexpr(t3)));
+                     break;
+                  }
+
+               case 0x03: { /* BINSL.D */
+                     DIP("BINSL.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     ULong tmp = 0x8000000000000000ULL;
+                     assign(t1,
+                            binop(Iop_Sar64x2,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  getWReg(wt)));
+                     assign(t2,
+                            binop(Iop_AndV128,
+                                  unop(Iop_NotV128, mkexpr(t1)),
+                                  getWReg(wd)));
+                     assign(t3,
+                            binop(Iop_AndV128,
+                                  mkexpr(t1), getWReg(ws)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t2), mkexpr(t3)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x07: { /* BINSR.df */
+            switch (df) {
+               case 0x00: { /* BINSR.B */
+                     DIP("BINSR.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     ULong tmp = 0xFEFEFEFEFEFEFEFEULL;
+                     assign(t1,
+                            binop(Iop_Shl8x16,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  getWReg(wt)));
+                     assign(t2,
+                            binop(Iop_AndV128,
+                                  unop(Iop_NotV128, mkexpr(t1)),
+                                  getWReg(ws)));
+                     assign(t3,
+                            binop(Iop_AndV128,
+                                  mkexpr(t1), getWReg(wd)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t2), mkexpr(t3)));
+                     break;
+                  }
+
+               case 0x01: { /* BINSR.H */
+                     DIP("BINSR.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     ULong tmp = 0xFFFEFFFEFFFEFFFEULL;
+                     assign(t1,
+                            binop(Iop_Shl16x8,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  getWReg(wt)));
+                     assign(t2,
+                            binop(Iop_AndV128,
+                                  unop(Iop_NotV128, mkexpr(t1)),
+                                  getWReg(ws)));
+                     assign(t3,
+                            binop(Iop_AndV128,
+                                  mkexpr(t1), getWReg(wd)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t2), mkexpr(t3)));
+                     break;
+                  }
+
+               case 0x02: { /* BINSR.W */
+                     DIP("BINSR.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     ULong tmp = 0xFFFFFFFEFFFFFFFEULL;
+                     assign(t1,
+                            binop(Iop_Shl32x4,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  getWReg(wt)));
+                     assign(t2,
+                            binop(Iop_AndV128,
+                                  unop(Iop_NotV128, mkexpr(t1)),
+                                  getWReg(ws)));
+                     assign(t3,
+                            binop(Iop_AndV128,
+                                  mkexpr(t1), getWReg(wd)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t2), mkexpr(t3)));
+                     break;
+                  }
+
+               case 0x03: { /* BINSR.D */
+                     DIP("BINSR.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     ULong tmp = -2;
+                     assign(t1,
+                            binop(Iop_Shl64x2,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(tmp), mkU64(tmp)),
+                                  getWReg(wt)));
+                     assign(t2,
+                            binop(Iop_AndV128,
+                                  unop(Iop_NotV128, mkexpr(t1)),
+                                  getWReg(ws)));
+                     assign(t3,
+                            binop(Iop_AndV128,
+                                  mkexpr(t1), getWReg(wd)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t2), mkexpr(t3)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_3R_0E(UInt cins, UChar wd, UChar ws) { /* 3R (0x0E) */
+   IRTemp t1, t2, t3, t4;
+   UShort operation;
+   UChar df, wt;
+
+   operation = (cins & 0x03800000) >> 23;
+   df = (cins & 0x00600000) >> 21;
+   wt = (cins & 0x001F0000) >> 16;
+
+   switch (operation) {
+      case 0x00: { /* ADDV.df */
+            switch (df) {
+               case 0x00: { /* ADDV.B */
+                     DIP("ADDV.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Add8x16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* ADDV.H */
+                     DIP("ADDV.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Add16x8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* ADDV.W */
+                     DIP("ADDV.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Add32x4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* ADDV.D */
+                     DIP("ADDV.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Add64x2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x01: { /* SUBV.df */
+            switch (df) {
+               case 0x00: { /* SUBV.B */
+                     DIP("SUBV.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Sub8x16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* SUBV.H */
+                     DIP("SUBV.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Sub16x8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* SUBV.W */
+                     DIP("SUBV.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Sub32x4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* SUBV.D */
+                     DIP("SUBV.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Sub64x2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x02: { /* MAX_S.df */
+            switch (df) {
+               case 0x00: { /* MAX_S.B */
+                     DIP("MAX_S.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Max8Sx16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* MAX_S.H */
+                     DIP("MAX_S.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Max16Sx8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* MAX_S.W */
+                     DIP("MAX_S.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Max32Sx4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* MAX_S.D */
+                     DIP("MAX_S.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Max64Sx2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x03: { /* MAX_U.df */
+            switch (df) {
+               case 0x00: { /* MAX_U.B */
+                     DIP("MAX_U.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Max8Ux16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* MAX_U.H */
+                     DIP("MAX_U.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Max16Ux8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* MAX_U.W */
+                     DIP("MAX_U.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Max32Ux4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* MAX_U.D */
+                     DIP("MAX_U.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Max64Ux2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x04: { /* MIN_S.df */
+            switch (df) {
+               case 0x00: { /* MIN_S.B */
+                     DIP("MIN_S.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Min8Sx16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* MIN_S.H */
+                     DIP("MIN_S.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Min16Sx8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* MIN_S.W */
+                     DIP("MIN_S.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Min32Sx4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* MIN_S.D */
+                     DIP("MIN_S.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Min64Sx2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x05: { /* MIN_U.df */
+            switch (df) {
+               case 0x00: { /* MIN_U.B */
+                     DIP("MIN_U.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Min8Ux16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* MIN_U.H */
+                     DIP("MIN_U.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Min16Ux8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* MIN_U.W */
+                     DIP("MIN_U.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Min32Ux4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* MIN_U.D */
+                     DIP("MIN_U.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Min64Ux2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x06: { /* MAX_A.df */
+            switch (df) {
+               case 0x00: { /* MAX_A.B */
+                     DIP("MAX_A.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1, unop(Iop_Abs8x16, getWReg(ws)));
+                     assign(t2, unop(Iop_Abs8x16, getWReg(wt)));
+                     assign(t4, binop(Iop_CmpGT8Ux16, mkexpr(t1), mkexpr(t2)));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_AndV128,
+                                            mkexpr(t4),
+                                            getWReg(ws)),
+                                      binop(Iop_AndV128,
+                                            unop(Iop_NotV128, mkexpr(t4)),
+                                            getWReg(wt))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* MAX_A.H */
+                     DIP("MAX_A.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1, unop(Iop_Abs16x8, getWReg(ws)));
+                     assign(t2, unop(Iop_Abs16x8, getWReg(wt)));
+                     assign(t4, binop(Iop_CmpGT16Ux8, mkexpr(t1), mkexpr(t2)));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_AndV128,
+                                            mkexpr(t4),
+                                            getWReg(ws)),
+                                      binop(Iop_AndV128,
+                                            unop(Iop_NotV128, mkexpr(t4)),
+                                            getWReg(wt))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* MAX_A.W */
+                     DIP("MAX_A.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1, unop(Iop_Abs32x4, getWReg(ws)));
+                     assign(t2, unop(Iop_Abs32x4, getWReg(wt)));
+                     assign(t4, binop(Iop_CmpGT32Ux4, mkexpr(t1), mkexpr(t2)));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_AndV128,
+                                            mkexpr(t4),
+                                            getWReg(ws)),
+                                      binop(Iop_AndV128,
+                                            unop(Iop_NotV128, mkexpr(t4)),
+                                            getWReg(wt))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* MAX_A.D */
+                     DIP("MAX_A.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1, unop(Iop_Abs64x2, getWReg(ws)));
+                     assign(t2, unop(Iop_Abs64x2, getWReg(wt)));
+                     assign(t4, binop(Iop_CmpGT64Ux2, mkexpr(t1), mkexpr(t2)));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_AndV128,
+                                            mkexpr(t4),
+                                            getWReg(ws)),
+                                      binop(Iop_AndV128,
+                                            unop(Iop_NotV128, mkexpr(t4)),
+                                            getWReg(wt))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x07: { /* MIN_A.df */
+            switch (df) {
+               case 0x00: { /* MIN_A.B */
+                     DIP("MIN_A.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1, unop(Iop_Abs8x16, getWReg(ws)));
+                     assign(t2, unop(Iop_Abs8x16, getWReg(wt)));
+                     assign(t4, binop(Iop_OrV128,
+                                      binop(Iop_CmpGT8Ux16,
+                                            mkexpr(t1), mkexpr(t2)),
+                                      binop(Iop_CmpEQ8x16,
+                                            mkexpr(t1), mkexpr(t2))));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_AndV128,
+                                            mkexpr(t4),
+                                            getWReg(wt)),
+                                      binop(Iop_AndV128,
+                                            unop(Iop_NotV128, mkexpr(t4)),
+                                            getWReg(ws))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* MIN_A.H */
+                     DIP("MIN_A.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1, unop(Iop_Abs16x8, getWReg(ws)));
+                     assign(t2, unop(Iop_Abs16x8, getWReg(wt)));
+                     assign(t4, binop(Iop_OrV128,
+                                      binop(Iop_CmpGT16Ux8,
+                                            mkexpr(t1), mkexpr(t2)),
+                                      binop(Iop_CmpEQ16x8,
+                                            mkexpr(t1), mkexpr(t2))));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_AndV128,
+                                            mkexpr(t4),
+                                            getWReg(wt)),
+                                      binop(Iop_AndV128,
+                                            unop(Iop_NotV128, mkexpr(t4)),
+                                            getWReg(ws))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* MIN_A.W */
+                     DIP("MIN_A.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1, unop(Iop_Abs32x4, getWReg(ws)));
+                     assign(t2, unop(Iop_Abs32x4, getWReg(wt)));
+                     assign(t4, binop(Iop_OrV128,
+                                      binop(Iop_CmpGT32Ux4,
+                                            mkexpr(t1), mkexpr(t2)),
+                                      binop(Iop_CmpEQ32x4,
+                                            mkexpr(t1), mkexpr(t2))));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_AndV128,
+                                            mkexpr(t4),
+                                            getWReg(wt)),
+                                      binop(Iop_AndV128,
+                                            unop(Iop_NotV128, mkexpr(t4)),
+                                            getWReg(ws))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* MIN_A.D */
+                     DIP("MIN_A.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1, unop(Iop_Abs64x2, getWReg(ws)));
+                     assign(t2, unop(Iop_Abs64x2, getWReg(wt)));
+                     assign(t4, binop(Iop_OrV128,
+                                      binop(Iop_CmpGT64Ux2,
+                                            mkexpr(t1), mkexpr(t2)),
+                                      binop(Iop_CmpEQ64x2,
+                                            mkexpr(t1), mkexpr(t2))));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_AndV128,
+                                            mkexpr(t4),
+                                            getWReg(wt)),
+                                      binop(Iop_AndV128,
+                                            unop(Iop_NotV128, mkexpr(t4)),
+                                            getWReg(ws))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_3R_0F(UInt cins, UChar wd, UChar ws) { /* 3R (0x0F) */
+   IRTemp t1, t2, t3;
+   UShort operation;
+   UChar df, wt;
+
+   operation = (cins & 0x03800000) >> 23;
+   df = (cins & 0x00600000) >> 21;
+   wt = (cins & 0x001F0000) >> 16;
+
+   switch (operation) {
+      case 0x00: { /* CEQ.df */
+            switch (df) {
+               case 0x00: { /* CEQ.B */
+                     DIP("CEQ.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_CmpEQ8x16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* CEQ.H */
+                     DIP("CEQ.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_CmpEQ16x8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* CEQ.W */
+                     DIP("CEQ.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_CmpEQ32x4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* CEQ.D */
+                     DIP("CEQ.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_CmpEQ64x2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x02: { /* CLT_S.df */
+            switch (df) {
+               case 0x00: { /* CLT_S.B */
+                     DIP("CLT_S.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_CmpGT8Sx16, mkexpr(t2), mkexpr(t1)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* CLT_S.H */
+                     DIP("CLT_S.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_CmpGT16Sx8, mkexpr(t2), mkexpr(t1)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* CLT_S.W */
+                     DIP("CLT_S.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_CmpGT32Sx4, mkexpr(t2), mkexpr(t1)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* CLT_S.D */
+                     DIP("CLT_S.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_CmpGT64Sx2, mkexpr(t2), mkexpr(t1)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x03: { /* CLT_U.df */
+            switch (df) {
+               case 0x00: { /* CLT_U.B */
+                     DIP("CLT_U.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_CmpGT8Ux16, mkexpr(t2), mkexpr(t1)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* CLT_U.H */
+                     DIP("CLT_U.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_CmpGT16Ux8, mkexpr(t2), mkexpr(t1)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* CLT_U.W */
+                     DIP("CLT_U.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_CmpGT32Ux4, mkexpr(t2), mkexpr(t1)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* CLT_U.D */
+                     DIP("CLT_U.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_CmpGT64Ux2, mkexpr(t2), mkexpr(t1)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x04: { /* CLE_S.df */
+            switch (df) {
+               case 0x00: { /* CLE_S.B */
+                     DIP("CLE_S.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_CmpGT8Sx16,
+                                            mkexpr(t2), mkexpr(t1)),
+                                      binop(Iop_CmpEQ8x16,
+                                            mkexpr(t1), mkexpr(t2))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* CLE_S.H */
+                     DIP("CLE_S.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_CmpGT16Sx8,
+                                            mkexpr(t2), mkexpr(t1)),
+                                      binop(Iop_CmpEQ16x8,
+                                            mkexpr(t1), mkexpr(t2))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* CLE_S.W */
+                     DIP("CLE_S.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_CmpGT32Sx4,
+                                            mkexpr(t2), mkexpr(t1)),
+                                      binop(Iop_CmpEQ32x4,
+                                            mkexpr(t1), mkexpr(t2))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* CLE_S.D */
+                     DIP("CLE_S.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_CmpGT64Sx2,
+                                            mkexpr(t2), mkexpr(t1)),
+                                      binop(Iop_CmpEQ64x2,
+                                            mkexpr(t1), mkexpr(t2))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x05: { /* CLE_U.df */
+            switch (df) {
+               case 0x00: { /* CLE_U.B */
+                     DIP("CLE_U.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_CmpGT8Ux16,
+                                            mkexpr(t2), mkexpr(t1)),
+                                      binop(Iop_CmpEQ8x16,
+                                            mkexpr(t1), mkexpr(t2))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* CLE_U.H */
+                     DIP("CLE_U.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_CmpGT16Ux8,
+                                            mkexpr(t2), mkexpr(t1)),
+                                      binop(Iop_CmpEQ16x8,
+                                            mkexpr(t1), mkexpr(t2))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* CLE_U.W */
+                     DIP("CLE_U.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_CmpGT32Ux4,
+                                            mkexpr(t2), mkexpr(t1)),
+                                      binop(Iop_CmpEQ32x4,
+                                            mkexpr(t1), mkexpr(t2))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* CLE_U.D */
+                     DIP("CLE_U.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_OrV128,
+                                  binop(Iop_CmpGT64Ux2,
+                                        mkexpr(t2), mkexpr(t1)),
+                                  binop(Iop_CmpEQ64x2,
+                                        mkexpr(t1), mkexpr(t2))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_3R_10(UInt cins, UChar wd, UChar ws) { /* 3R (0x10) */
+   IRTemp t1, t2, t3, t4;
+   UShort operation;
+   UChar df, wt;
+
+   operation = (cins & 0x03800000) >> 23;
+   df = (cins & 0x00600000) >> 21;
+   wt = (cins & 0x001F0000) >> 16;
+
+   switch (operation) {
+      case 0x00: { /* ADD_A.df */
+            switch (df) {
+               case 0x00: { /* ADD_A.B */
+                     DIP("ADD_A.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, unop(Iop_Abs8x16, getWReg(ws)));
+                     assign(t2, unop(Iop_Abs8x16, getWReg(wt)));
+                     assign(t3, binop(Iop_Add8x16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* ADD_A.H */
+                     DIP("ADD_A.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, unop(Iop_Abs16x8, getWReg(ws)));
+                     assign(t2, unop(Iop_Abs16x8, getWReg(wt)));
+                     assign(t3, binop(Iop_Add16x8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* ADD_A.W */
+                     DIP("ADD_A.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, unop(Iop_Abs32x4, getWReg(ws)));
+                     assign(t2, unop(Iop_Abs32x4, getWReg(wt)));
+                     assign(t3, binop(Iop_Add32x4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* ADD_A.D */
+                     DIP("ADD_A.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, unop(Iop_Abs64x2, getWReg(ws)));
+                     assign(t2, unop(Iop_Abs64x2, getWReg(wt)));
+                     assign(t3, binop(Iop_Add64x2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x01: { /* ADDS_A.df */
+            switch (df) {
+               case 0x00: { /* ADDS_A.B */
+                     DIP("ADDS_A.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1, unop(Iop_Abs8x16, getWReg(ws)));
+                     assign(t2, unop(Iop_Abs8x16, getWReg(wt)));
+                     assign(t3, binop(Iop_SarN8x16,
+                                      binop(Iop_AndV128,
+                                            mkexpr(t1),
+                                            getWReg(ws)),
+                                      mkU8(7)));
+                     assign(t4, binop(Iop_SarN8x16,
+                                      binop(Iop_AndV128,
+                                            mkexpr(t2),
+                                            getWReg(wt)),
+                                      mkU8(7)));
+                     putWReg(wd, binop(Iop_QAdd8Sx16,
+                                       binop(Iop_OrV128,
+                                             binop(Iop_AndV128,
+                                                   unop(Iop_NotV128,
+                                                        mkexpr(t3)),
+                                                   mkexpr(t1)),
+                                             binop(Iop_AndV128,
+                                                   unop(Iop_NotV128,
+                                                        mkexpr(t1)),
+                                                   mkexpr(t3))),
+                                       binop(Iop_OrV128,
+                                             binop(Iop_AndV128,
+                                                   unop(Iop_NotV128,
+                                                        mkexpr(t4)),
+                                                   mkexpr(t2)),
+                                             binop(Iop_AndV128,
+                                                   unop(Iop_NotV128,
+                                                        mkexpr(t2)),
+                                                   mkexpr(t4)))));
+                     break;
+                  }
+
+               case 0x01: { /* ADDS_A.H */
+                     DIP("ADDS_A.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1, unop(Iop_Abs16x8, getWReg(ws)));
+                     assign(t2, unop(Iop_Abs16x8, getWReg(wt)));
+                     assign(t3, binop(Iop_SarN16x8,
+                                      binop(Iop_AndV128,
+                                            mkexpr(t1),
+                                            getWReg(ws)),
+                                      mkU8(15)));
+                     assign(t4, binop(Iop_SarN16x8,
+                                   binop(Iop_AndV128,
+                                         mkexpr(t2),
+                                         getWReg(wt)),
+                                   mkU8(15)));
+                     putWReg(wd, binop(Iop_QAdd16Sx8,
+                                       binop(Iop_OrV128,
+                                             binop(Iop_AndV128,
+                                                   unop(Iop_NotV128,
+                                                        mkexpr(t3)),
+                                                   mkexpr(t1)),
+                                             binop(Iop_AndV128,
+                                                   unop(Iop_NotV128,
+                                                        mkexpr(t1)),
+                                                   mkexpr(t3))),
+                                       binop(Iop_OrV128,
+                                             binop(Iop_AndV128,
+                                                   unop(Iop_NotV128,
+                                                        mkexpr(t4)),
+                                                   mkexpr(t2)),
+                                             binop(Iop_AndV128,
+                                                   unop(Iop_NotV128,
+                                                        mkexpr(t2)),
+                                                   mkexpr(t4)))));
+                     break;
+                  }
+
+               case 0x02: { /* ADDS_A.W */
+                     DIP("ADDS_A.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1, unop(Iop_Abs32x4, getWReg(ws)));
+                     assign(t2, unop(Iop_Abs32x4, getWReg(wt)));
+                     assign(t3, binop(Iop_SarN32x4,
+                                      binop(Iop_AndV128,
+                                            mkexpr(t1),
+                                            getWReg(ws)),
+                                      mkU8(31)));
+                     assign(t4, binop(Iop_SarN32x4,
+                                      binop(Iop_AndV128,
+                                            mkexpr(t2),
+                                            getWReg(wt)),
+                                      mkU8(31)));
+                     putWReg(wd, binop(Iop_QAdd32Sx4,
+                                       binop(Iop_OrV128,
+                                             binop(Iop_AndV128,
+                                                   unop(Iop_NotV128,
+                                                        mkexpr(t3)),
+                                                   mkexpr(t1)),
+                                             binop(Iop_AndV128,
+                                                   unop(Iop_NotV128,
+                                                        mkexpr(t1)),
+                                                   mkexpr(t3))),
+                                       binop(Iop_OrV128,
+                                             binop(Iop_AndV128,
+                                                   unop(Iop_NotV128,
+                                                        mkexpr(t4)),
+                                                   mkexpr(t2)),
+                                             binop(Iop_AndV128,
+                                                   unop(Iop_NotV128,
+                                                        mkexpr(t2)),
+                                                   mkexpr(t4)))));
+                     break;
+                  }
+
+               case 0x03: { /* ADDS_A.D */
+                     DIP("ADDS_A.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1, unop(Iop_Abs64x2, getWReg(ws)));
+                     assign(t2, unop(Iop_Abs64x2, getWReg(wt)));
+                     assign(t3, binop(Iop_SarN64x2,
+                                      binop(Iop_AndV128,
+                                            mkexpr(t1),
+                                            getWReg(ws)),
+                                      mkU8(63)));
+                     assign(t4, binop(Iop_SarN64x2,
+                                      binop(Iop_AndV128,
+                                            mkexpr(t2),
+                                            getWReg(wt)),
+                                      mkU8(63)));
+                     putWReg(wd,
+                             binop(Iop_QAdd64Sx2,
+                                   binop(Iop_OrV128,
+                                         binop(Iop_AndV128,
+                                               unop(Iop_NotV128,
+                                                    mkexpr(t3)),
+                                               mkexpr(t1)),
+                                         binop(Iop_AndV128,
+                                               unop(Iop_NotV128,
+                                                    mkexpr(t1)),
+                                               mkexpr(t3))),
+                                   binop(Iop_OrV128,
+                                         binop(Iop_AndV128,
+                                               unop(Iop_NotV128,
+                                                    mkexpr(t4)),
+                                               mkexpr(t2)),
+                                         binop(Iop_AndV128,
+                                               unop(Iop_NotV128,
+                                                    mkexpr(t2)),
+                                               mkexpr(t4)))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x02: { /* ADDS_S.df */
+            switch (df) {
+               case 0x00: { /* ADDS_S.B */
+                     DIP("ADDS_S.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_QAdd8Sx16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* ADDS_S.H */
+                     DIP("ADDS_S.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_QAdd16Sx8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* ADDS_S.W */
+                     DIP("ADDS_S.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_QAdd32Sx4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* ADDS_S.D */
+                     DIP("ADDS_S.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_QAdd64Sx2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x03: { /* ADDS_U.df */
+            switch (df) {
+               case 0x00: { /* ADDS_U.B */
+                     DIP("ADDS_U.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_QAdd8Ux16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* ADDS_U.H */
+                     DIP("ADDS_U.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_QAdd16Ux8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* ADDS_U.W */
+                     DIP("ADDS_U.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_QAdd32Ux4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* ADDS_U.D */
+                     DIP("ADDS_U.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_QAdd64Ux2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x04: { /* AVE_S.df */
+            switch (df) {
+               case 0x00: { /* AVE_S.B */
+                     DIP("AVE_S.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Add8x16,
+                                      binop(Iop_Add8x16,
+                                            binop(Iop_SarN8x16,
+                                                  mkexpr(t1), mkU8(1)),
+                                            binop(Iop_SarN8x16,
+                                                  mkexpr(t2), mkU8(1))),
+                                      binop(Iop_ShrN8x16,
+                                            binop(Iop_ShlN8x16,
+                                                  binop(Iop_AndV128,
+                                                        mkexpr(t1),
+                                                        mkexpr(t2)),
+                                                  mkU8(7)),
+                                            mkU8(7))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* AVE_S.H */
+                     DIP("AVE_S.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_Add16x8,
+                                  binop(Iop_Add16x8,
+                                        binop(Iop_SarN16x8,
+                                              mkexpr(t1), mkU8(1)),
+                                        binop(Iop_SarN16x8,
+                                              mkexpr(t2), mkU8(1))),
+                                  binop(Iop_ShrN16x8,
+                                        binop(Iop_ShlN16x8,
+                                              binop(Iop_AndV128,
+                                                    mkexpr(t1),
+                                                    mkexpr(t2)),
+                                              mkU8(15)),
+                                        mkU8(15))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* AVE_S.W */
+                     DIP("AVE_S.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Add32x4,
+                                      binop(Iop_Add32x4,
+                                            binop(Iop_SarN32x4,
+                                                  mkexpr(t1), mkU8(1)),
+                                            binop(Iop_SarN32x4,
+                                                  mkexpr(t2), mkU8(1))),
+                                      binop(Iop_ShrN32x4,
+                                            binop(Iop_ShlN32x4,
+                                                  binop(Iop_AndV128,
+                                                        mkexpr(t1),
+                                                        mkexpr(t2)),
+                                                  mkU8(31)),
+                                            mkU8(31))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* AVE_S.D */
+                     DIP("AVE_S.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Add64x2,
+                                      binop(Iop_Add64x2,
+                                            binop(Iop_SarN64x2,
+                                                  mkexpr(t1), mkU8(1)),
+                                            binop(Iop_SarN64x2,
+                                                  mkexpr(t2), mkU8(1))),
+                                      binop(Iop_ShrN64x2,
+                                            binop(Iop_ShlN64x2,
+                                                  binop(Iop_AndV128,
+                                                        mkexpr(t1),
+                                                        mkexpr(t2)),
+                                                  mkU8(63)),
+                                            mkU8(63))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x05: { /* AVE_U.df */
+            switch (df) {
+               case 0x00: { /* AVE_U.B */
+                     DIP("AVE_U.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Add16x8,
+                                      binop(Iop_Add8x16,
+                                            binop(Iop_ShrN8x16,
+                                                  mkexpr(t1), mkU8(1)),
+                                            binop(Iop_ShrN8x16,
+                                                  mkexpr(t2), mkU8(1))),
+                                      binop(Iop_ShrN8x16,
+                                            binop(Iop_ShlN8x16,
+                                                  binop(Iop_AndV128,
+                                                        mkexpr(t1),
+                                                        mkexpr(t2)),
+                                                  mkU8(7)),
+                                            mkU8(7))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* AVE_U.H */
+                     DIP("AVE_U.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Add16x8,
+                                      binop(Iop_Add16x8,
+                                            binop(Iop_ShrN16x8,
+                                                  mkexpr(t1), mkU8(1)),
+                                            binop(Iop_ShrN16x8,
+                                                  mkexpr(t2), mkU8(1))),
+                                      binop(Iop_ShrN16x8,
+                                            binop(Iop_ShlN16x8,
+                                                  binop(Iop_AndV128,
+                                                        mkexpr(t1),
+                                                        mkexpr(t2)),
+                                                  mkU8(15)),
+                                            mkU8(15))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* AVE_U.W */
+                     DIP("AVE_U.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Add32x4,
+                                      binop(Iop_Add32x4,
+                                            binop(Iop_ShrN32x4,
+                                                  mkexpr(t1), mkU8(1)),
+                                            binop(Iop_ShrN32x4,
+                                                  mkexpr(t2), mkU8(1))),
+                                      binop(Iop_ShrN32x4,
+                                            binop(Iop_ShlN32x4,
+                                                  binop(Iop_AndV128,
+                                                        mkexpr(t1),
+                                                        mkexpr(t2)),
+                                                  mkU8(31)),
+                                            mkU8(31))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* AVE_U.D */
+                     DIP("AVE_U.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Add64x2,
+                                      binop(Iop_Add64x2,
+                                            binop(Iop_ShrN64x2,
+                                                  mkexpr(t1), mkU8(1)),
+                                            binop(Iop_ShrN64x2,
+                                                  mkexpr(t2), mkU8(1))),
+                                      binop(Iop_ShrN64x2,
+                                            binop(Iop_ShlN64x2,
+                                                  binop(Iop_AndV128,
+                                                        mkexpr(t1),
+                                                        mkexpr(t2)),
+                                                  mkU8(63)),
+                                            mkU8(63))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x06: { /* AVER_S.df */
+            switch (df) {
+               case 0x00: { /* AVER_S.B */
+                     DIP("AVER_S.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Avg8Sx16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* AVER_S.H */
+                     DIP("AVER_S.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Avg16Sx8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* AVER_S.W */
+                     DIP("AVER_S.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Avg32Sx4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* AVER_S.D */
+                     DIP("AVER_S.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Add64x2,
+                                      binop(Iop_Add64x2,
+                                            binop(Iop_SarN64x2,
+                                                  mkexpr(t1), mkU8(1)),
+                                            binop(Iop_SarN64x2,
+                                                  mkexpr(t2), mkU8(1))),
+                                      binop(Iop_ShrN64x2,
+                                            binop(Iop_ShlN64x2,
+                                                  binop(Iop_OrV128,
+                                                        mkexpr(t1),
+                                                        mkexpr(t2)),
+                                                  mkU8(63)),
+                                            mkU8(63))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x07: { /* AVER_U.df */
+            switch (df) {
+               case 0x00: { /* AVER_U.B */
+                     DIP("AVER_U.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Avg8Ux16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* AVER_U.H */
+                     DIP("AVER_U.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Avg16Ux8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* AVER_U.W */
+                     DIP("AVER_U.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Avg32Ux4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* AVER_U.D */
+                     DIP("AVER_U.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_Add64x2,
+                                      binop(Iop_Add64x2,
+                                            binop(Iop_ShrN64x2,
+                                                  mkexpr(t1), mkU8(1)),
+                                            binop(Iop_ShrN64x2,
+                                                  mkexpr(t2), mkU8(1))),
+                                      binop(Iop_ShrN64x2,
+                                            binop(Iop_ShlN64x2,
+                                                  binop(Iop_OrV128,
+                                                        mkexpr(t1),
+                                                        mkexpr(t2)),
+                                                  mkU8(63)),
+                                            mkU8(63))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_3R_11(UInt cins, UChar wd, UChar ws) { /* 3R (0x11) */
+   IRTemp t1, t2, t3;
+   UShort operation;
+   UChar df, wt;
+
+   operation = (cins & 0x03800000) >> 23;
+   df = (cins & 0x00600000) >> 21;
+   wt = (cins & 0x001F0000) >> 16;
+
+   switch (operation) {
+      case 0x00: { /* SUBS_S.df */
+            switch (df) {
+               case 0x00: { /* SUBS_S.B */
+                     DIP("SUBS_S.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_QSub8Sx16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* SUBS_S.H */
+                     DIP("SUBS_S.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_QSub16Sx8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* SUBS_S.W */
+                     DIP("SUBS_S.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_QSub32Sx4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* SUBS_S.D */
+                     DIP("SUBS_S.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_QSub64Sx2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x01: { /* SUBS_U.df */
+            switch (df) {
+               case 0x00: { /* SUBS_U.B */
+                     DIP("SUBS_U.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_QSub8Ux16, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* SUBS_U.H */
+                     DIP("SUBS_U.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_QSub16Ux8, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* SUBS_U.W */
+                     DIP("SUBS_U.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_QSub32Ux4, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* SUBS_U.D */
+                     DIP("SUBS_U.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_QSub64Ux2, mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x02: { /* SUBSUS_U.df */
+            switch (df) {
+               case 0x00: { /* SUBSUS_U.B */
+                     DIP("SUBSUS_U.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, binop(Iop_Sub8x16, getWReg(ws), getWReg(wt)));
+                     assign(t2, binop(Iop_SarN8x16, getWReg(wt), mkU8(7)));
+                     assign(t3, binop(Iop_OrV128,
+                                      binop(Iop_CmpGT8Ux16,
+                                            getWReg(ws),
+                                            getWReg(wt)),
+                                      binop(Iop_CmpEQ8x16,
+                                            getWReg(ws),
+                                            getWReg(wt))));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_AndV128,
+                                         mkexpr(t3), mkexpr(t2)),
+                                   binop(Iop_AndV128,
+                                         mkexpr(t1),
+                                         binop(Iop_XorV128,
+                                               mkexpr(t3),
+                                               mkexpr(t2)))));
+                     break;
+                  }
+
+               case 0x01: { /* SUBSUS_U.H */
+                     DIP("SUBSUS_U.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, binop(Iop_Sub16x8, getWReg(ws), getWReg(wt)));
+                     assign(t2, binop(Iop_SarN16x8, getWReg(wt), mkU8(15)));
+                     assign(t3,
+                            binop(Iop_OrV128,
+                                  binop(Iop_CmpGT16Ux8,
+                                        getWReg(ws),
+                                        getWReg(wt)),
+                                  binop(Iop_CmpEQ16x8,
+                                        getWReg(ws),
+                                        getWReg(wt))));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_AndV128,
+                                         mkexpr(t3), mkexpr(t2)),
+                                   binop(Iop_AndV128,
+                                         mkexpr(t1),
+                                         binop(Iop_XorV128,
+                                               mkexpr(t3),
+                                               mkexpr(t2)))));
+                     break;
+                  }
+
+               case 0x02: { /* SUBSUS_U.W */
+                     DIP("SUBSUS_U.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, binop(Iop_Sub32x4, getWReg(ws), getWReg(wt)));
+                     assign(t2, binop(Iop_SarN32x4, getWReg(wt), mkU8(31)));
+                     assign(t3,
+                            binop(Iop_OrV128,
+                                  binop(Iop_CmpGT32Ux4,
+                                        getWReg(ws),
+                                        getWReg(wt)),
+                                  binop(Iop_CmpEQ32x4,
+                                        getWReg(ws),
+                                        getWReg(wt))));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_AndV128,
+                                         mkexpr(t3), mkexpr(t2)),
+                                   binop(Iop_AndV128,
+                                         mkexpr(t1),
+                                         binop(Iop_XorV128,
+                                               mkexpr(t3),
+                                               mkexpr(t2)))));
+                     break;
+                  }
+
+               case 0x03: { /* SUBSUS_U.D */
+                     DIP("SUBSUS_U.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, binop(Iop_Sub64x2, getWReg(ws), getWReg(wt)));
+                     assign(t2, binop(Iop_SarN64x2, getWReg(wt), mkU8(63)));
+                     assign(t3,
+                            binop(Iop_OrV128,
+                                  binop(Iop_CmpGT64Ux2,
+                                        getWReg(ws),
+                                        getWReg(wt)),
+                                  binop(Iop_CmpEQ64x2,
+                                        getWReg(ws),
+                                        getWReg(wt))));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_AndV128,
+                                         mkexpr(t3), mkexpr(t2)),
+                                   binop(Iop_AndV128,
+                                         mkexpr(t1),
+                                         binop(Iop_XorV128,
+                                               mkexpr(t3),
+                                               mkexpr(t2)))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x03: { /* SUBSUU_S.df */
+            switch (df) {
+               case 0x00: { /* SUBSUU_S.B */
+                     DIP("SUBSUU_S.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, binop(Iop_Sub8x16, getWReg(ws), getWReg(wt)));
+                     assign(t2,
+                            binop(Iop_SarN8x16,
+                                  binop (Iop_AndV128,
+                                         binop(Iop_XorV128,
+                                               getWReg(ws),
+                                               getWReg(wt)),
+                                         binop(Iop_XorV128,
+                                               mkexpr(t1),
+                                               getWReg(wt))),
+                                  mkU8(7)));
+                     assign(t3,
+                            binop(Iop_AndV128,
+                                  binop(Iop_SarN8x16,
+                                        getWReg(ws), mkU8(7)),
+                                  mkexpr(t2)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_AndV128,
+                                         mkexpr(t1),
+                                         unop(Iop_NotV128,
+                                              mkexpr(t2))),
+                                   binop(Iop_XorV128,
+                                         binop(Iop_ShlN8x16,
+                                               mkexpr(t2), mkU8(7)),
+                                         mkexpr(t3))));
+                     break;
+                  }
+
+               case 0x01: { /* SUBSUU_S.H */
+                     DIP("SUBSUU_S.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, binop(Iop_Sub16x8, getWReg(ws), getWReg(wt)));
+                     assign(t2,
+                            binop(Iop_SarN16x8,
+                                  binop (Iop_AndV128,
+                                         binop(Iop_XorV128,
+                                               getWReg(ws),
+                                               getWReg(wt)),
+                                         binop(Iop_XorV128,
+                                               mkexpr(t1),
+                                               getWReg(wt))),
+                                  mkU8(15)));
+                     assign(t3,
+                            binop(Iop_AndV128,
+                                  binop(Iop_SarN16x8,
+                                        getWReg(ws),
+                                        mkU8(15)),
+                                  mkexpr(t2)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_AndV128,
+                                         mkexpr(t1),
+                                         unop(Iop_NotV128,
+                                              mkexpr(t2))),
+                                   binop(Iop_XorV128,
+                                         binop(Iop_ShlN16x8,
+                                               mkexpr(t2), mkU8(15)),
+                                         mkexpr(t3))));
+                     break;
+                  }
+
+               case 0x02: { /* SUBSUU_S.W */
+                     DIP("SUBSUU_S.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, binop(Iop_Sub32x4, getWReg(ws), getWReg(wt)));
+                     assign(t2,
+                            binop(Iop_SarN32x4,
+                                  binop (Iop_AndV128,
+                                         binop(Iop_XorV128,
+                                               getWReg(ws),
+                                               getWReg(wt)),
+                                         binop(Iop_XorV128,
+                                               mkexpr(t1),
+                                               getWReg(wt))),
+                                  mkU8(31)));
+                     assign(t3,
+                            binop(Iop_AndV128,
+                                  binop(Iop_SarN32x4,
+                                        getWReg(ws),
+                                        mkU8(31)),
+                                  mkexpr(t2)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_AndV128,
+                                         mkexpr(t1),
+                                         unop(Iop_NotV128,
+                                              mkexpr(t2))),
+                                   binop(Iop_XorV128,
+                                         binop(Iop_ShlN32x4,
+                                               mkexpr(t2),
+                                               mkU8(31)),
+                                         mkexpr(t3))));
+                     break;
+                  }
+
+               case 0x03: { /* SUBSUU_S.D */
+                     DIP("SUBSUU_S.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, binop(Iop_Sub64x2, getWReg(ws), getWReg(wt)));
+                     assign(t2,
+                            binop(Iop_SarN64x2,
+                                  binop (Iop_AndV128,
+                                         binop(Iop_XorV128,
+                                               getWReg(ws),
+                                               getWReg(wt)),
+                                         binop(Iop_XorV128,
+                                               mkexpr(t1),
+                                               getWReg(wt))),
+                                  mkU8(63)));
+                     assign(t3,
+                            binop(Iop_AndV128,
+                                  binop(Iop_SarN64x2,
+                                        getWReg(ws),
+                                        mkU8(63)),
+                                  mkexpr(t2)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_AndV128,
+                                         mkexpr(t1),
+                                         unop(Iop_NotV128,
+                                              mkexpr(t2))),
+                                   binop(Iop_XorV128,
+                                         binop(Iop_ShlN64x2,
+                                               mkexpr(t2), mkU8(63)),
+                                         mkexpr(t3))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x04: { /* ASUB_S.df */
+            switch (df) {
+               case 0x00: { /* ASUB_S.B */
+                     DIP("ASUB_S.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, binop(Iop_SarN8x16, getWReg(ws), mkU8(7)));
+                     assign(t2, binop(Iop_SarN8x16, getWReg(wt), mkU8(7)));
+                     assign(t3, binop(Iop_Sub8x16, getWReg(ws), getWReg(wt)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_OrV128,
+                                         binop(Iop_AndV128,
+                                               binop(Iop_AndV128,
+                                                     unop(Iop_NotV128,
+                                                          mkexpr(t1)),
+                                                     mkexpr(t2)),
+                                               mkexpr(t3)),
+                                         binop(Iop_AndV128,
+                                               unop(Iop_NotV128,
+                                                    binop(Iop_XorV128,
+                                                          mkexpr(t1),
+                                                          mkexpr(t2))),
+                                               unop(Iop_Abs8x16,
+                                                    mkexpr(t3)))),
+                                   binop(Iop_AndV128,
+                                         binop(Iop_AndV128,
+                                               mkexpr(t1),
+                                               unop(Iop_NotV128,
+                                                    mkexpr(t2))),
+                                         binop(Iop_Sub8x16,
+                                               getWReg(wt),
+                                               getWReg(ws)))));
+                     break;
+                  }
+
+               case 0x01: { /* ASUB_S.H */
+                     DIP("ASUB_S.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, binop(Iop_SarN16x8, getWReg(ws), mkU8(15)));
+                     assign(t2, binop(Iop_SarN16x8, getWReg(wt), mkU8(15)));
+                     assign(t3, binop(Iop_Sub16x8, getWReg(ws), getWReg(wt)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_OrV128,
+                                         binop(Iop_AndV128,
+                                               binop(Iop_AndV128,
+                                                     unop(Iop_NotV128,
+                                                          mkexpr(t1)),
+                                                     mkexpr(t2)),
+                                               mkexpr(t3)),
+                                         binop(Iop_AndV128,
+                                               unop(Iop_NotV128,
+                                                    binop(Iop_XorV128,
+                                                          mkexpr(t1),
+                                                          mkexpr(t2))),
+                                               unop(Iop_Abs16x8,
+                                                    mkexpr(t3)))),
+                                   binop(Iop_AndV128,
+                                         binop(Iop_AndV128,
+                                               mkexpr(t1),
+                                               unop(Iop_NotV128,
+                                                    mkexpr(t2))),
+                                         binop(Iop_Sub16x8,
+                                               getWReg(wt),
+                                               getWReg(ws)))));
+                     break;
+                  }
+
+               case 0x02: { /* ASUB_S.W */
+                     DIP("ASUB_S.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, binop(Iop_SarN32x4, getWReg(ws), mkU8(31)));
+                     assign(t2, binop(Iop_SarN32x4, getWReg(wt), mkU8(31)));
+                     assign(t3, binop(Iop_Sub32x4, getWReg(ws), getWReg(wt)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_OrV128,
+                                         binop(Iop_AndV128,
+                                               binop(Iop_AndV128,
+                                                     unop(Iop_NotV128,
+                                                          mkexpr(t1)),
+                                                     mkexpr(t2)),
+                                               mkexpr(t3)),
+                                         binop(Iop_AndV128,
+                                               unop(Iop_NotV128,
+                                                    binop(Iop_XorV128,
+                                                          mkexpr(t1),
+                                                          mkexpr(t2))),
+                                               unop(Iop_Abs32x4,
+                                                    mkexpr(t3)))),
+                                   binop(Iop_AndV128,
+                                         binop(Iop_AndV128,
+                                               mkexpr(t1),
+                                               unop(Iop_NotV128,
+                                                    mkexpr(t2))),
+                                         binop(Iop_Sub32x4,
+                                               getWReg(wt),
+                                               getWReg(ws)))));
+                     break;
+                  }
+
+               case 0x03: { /* ASUB_S.D */
+                     DIP("ASUB_S.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, binop(Iop_SarN64x2, getWReg(ws), mkU8(63)));
+                     assign(t2, binop(Iop_SarN64x2, getWReg(wt), mkU8(63)));
+                     assign(t3, binop(Iop_Sub64x2, getWReg(ws), getWReg(wt)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_OrV128,
+                                         binop(Iop_AndV128,
+                                               binop(Iop_AndV128,
+                                                     unop(Iop_NotV128,
+                                                          mkexpr(t1)),
+                                                     mkexpr(t2)),
+                                               mkexpr(t3)),
+                                         binop(Iop_AndV128,
+                                               unop(Iop_NotV128,
+                                                    binop(Iop_XorV128,
+                                                          mkexpr(t1),
+                                                          mkexpr(t2))),
+                                               unop(Iop_Abs64x2,
+                                                    mkexpr(t3)))),
+                                   binop(Iop_AndV128,
+                                         binop(Iop_AndV128,
+                                               mkexpr(t1),
+                                               unop(Iop_NotV128,
+                                                    mkexpr(t2))),
+                                         binop(Iop_Sub64x2,
+                                               getWReg(wt),
+                                               getWReg(ws)))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x05: { /* ASUB_U.df */
+            switch (df) {
+               case 0x00: { /* ASUB_U.B */
+                     DIP("ASUB_U.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_SarN8x16,
+                                  binop(Iop_XorV128,
+                                        mkexpr(t1), mkexpr(t2)),
+                                  mkU8(7)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_AndV128,
+                                         unop(Iop_NotV128, mkexpr(t3)),
+                                         unop(Iop_Abs8x16,
+                                              binop(Iop_Sub8x16,
+                                                    mkexpr(t1),
+                                                    mkexpr(t2)))),
+                                   binop(Iop_AndV128, mkexpr(t3),
+                                         binop(Iop_Sub8x16,
+                                               binop(Iop_Max8Ux16,
+                                                     mkexpr(t1),
+                                                     mkexpr(t2)),
+                                               binop(Iop_Min8Ux16,
+                                                     mkexpr(t1),
+                                                     mkexpr(t2))))));
+                     break;
+                  }
+
+               case 0x01: { /* ASUB_U.H */
+                     DIP("ASUB_U.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_SarN16x8,
+                                  binop(Iop_XorV128,
+                                        mkexpr(t1), mkexpr(t2)),
+                                  mkU8(15)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_AndV128,
+                                         unop(Iop_NotV128,
+                                              mkexpr(t3)),
+                                         unop(Iop_Abs16x8,
+                                              binop(Iop_Sub16x8,
+                                                    mkexpr(t1),
+                                                    mkexpr(t2)))),
+                                   binop(Iop_AndV128,
+                                         mkexpr(t3),
+                                         binop(Iop_Sub16x8,
+                                               binop(Iop_Max16Ux8,
+                                                     mkexpr(t1),
+                                                     mkexpr(t2)),
+                                               binop(Iop_Min16Ux8,
+                                                     mkexpr(t1),
+                                                     mkexpr(t2))))));
+                     break;
+                  }
+
+               case 0x02: { /* ASUB_U.W */
+                     DIP("ASUB_U.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_SarN32x4,
+                                  binop(Iop_XorV128,
+                                        mkexpr(t1), mkexpr(t2)),
+                                  mkU8(31)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_AndV128,
+                                         unop(Iop_NotV128, mkexpr(t3)),
+                                         unop(Iop_Abs32x4,
+                                              binop(Iop_Sub32x4,
+                                                    mkexpr(t1),
+                                                    mkexpr(t2)))),
+                                   binop(Iop_AndV128,
+                                         mkexpr(t3),
+                                         binop(Iop_Sub32x4,
+                                               binop(Iop_Max32Ux4,
+                                                     mkexpr(t1),
+                                                     mkexpr(t2)),
+                                               binop(Iop_Min32Ux4,
+                                                     mkexpr(t1),
+                                                     mkexpr(t2))))));
+                     break;
+                  }
+
+               case 0x03: { /* ASUB_U.D */
+                     DIP("ASUB_U.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_SarN64x2,
+                                  binop(Iop_XorV128,
+                                        mkexpr(t1), mkexpr(t2)),
+                                  mkU8(63)));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_AndV128,
+                                         unop(Iop_NotV128, mkexpr(t3)),
+                                         unop(Iop_Abs64x2,
+                                              binop(Iop_Sub64x2,
+                                                    mkexpr(t1),
+                                                    mkexpr(t2)))),
+                                   binop(Iop_AndV128,
+                                         mkexpr(t3),
+                                         binop(Iop_Sub64x2,
+                                               binop(Iop_Max64Ux2,
+                                                     mkexpr(t1),
+                                                     mkexpr(t2)),
+                                               binop(Iop_Min64Ux2,
+                                                     mkexpr(t1),
+                                                     mkexpr(t2))))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_3R_12(UInt cins, UChar wd, UChar ws) { /* 3R (0x12) */
+   IRTemp t1, t2, t3, t4, t5, t6;
+   UShort operation;
+   UChar df, wt;
+
+   operation = (cins & 0x03800000) >> 23;
+   df = (cins & 0x00600000) >> 21;
+   wt = (cins & 0x001F0000) >> 16;
+
+   switch (operation) {
+      case 0x00: { /* MULV.df */
+            switch (df) {
+               case 0x00: { /* MULV.B */
+                     DIP("MULV.B w%d, w%d, w%d", wd, ws, wt);
+                     putWReg(wd, binop(Iop_Mul8x16, getWReg(ws), getWReg(wt)));
+                     break;
+                  }
+
+               case 0x01: { /* MULV.H */
+                     DIP("MULV.H w%d, w%d, w%d", wd, ws, wt);
+                     putWReg(wd, binop(Iop_Mul16x8, getWReg(ws), getWReg(wt)));
+                     break;
+                  }
+
+               case 0x02: { /* MULV.W */
+                     DIP("MULV.W w%d, w%d, w%d", wd, ws, wt);
+                     putWReg(wd, binop(Iop_Mul32x4, getWReg(ws), getWReg(wt)));
+                     break;
+                  }
+
+               case 0x03: { /* MULV.D */
+                     DIP("MULV.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_Mul64,
+                                         unop(Iop_V128HIto64,
+                                              mkexpr(t1)),
+                                         unop(Iop_V128HIto64,
+                                              mkexpr(t2))),
+                                   binop(Iop_Mul64,
+                                         unop(Iop_V128to64,
+                                              mkexpr(t1)),
+                                         unop(Iop_V128to64,
+                                              mkexpr(t2)))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x01: { /* MADDV.df */
+            switch (df) {
+               case 0x00: { /* MADDV.B */
+                     DIP("MADDV.B w%d, w%d, w%d", wd, ws, wt);
+                     putWReg(wd,
+                             binop(Iop_Add8x16,
+                                   getWReg(wd),
+                                   binop(Iop_Mul8x16,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               case 0x01: { /* MADDV.H */
+                     DIP("MADDV.H w%d, w%d, w%d", wd, ws, wt);
+                     putWReg(wd,
+                             binop(Iop_Add16x8,
+                                   getWReg(wd),
+                                   binop(Iop_Mul16x8,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               case 0x02: { /* MADDV.W */
+                     DIP("MADDV.W w%d, w%d, w%d", wd, ws, wt);
+                     putWReg(wd,
+                             binop(Iop_Add32x4,
+                                   getWReg(wd),
+                                   binop(Iop_Mul32x4,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               case 0x03: { /* MADDV.D */
+                     DIP("MADDV.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     putWReg(wd,
+                             binop(Iop_Add64x2,
+                                   getWReg(wd),
+                                   binop(Iop_64HLtoV128,
+                                         binop(Iop_Mul64,
+                                               unop(Iop_V128HIto64,
+                                                    mkexpr(t1)),
+                                               unop(Iop_V128HIto64,
+                                                    mkexpr(t2))),
+                                         binop(Iop_Mul64,
+                                               unop(Iop_V128to64,
+                                                    mkexpr(t1)),
+                                               unop(Iop_V128to64,
+                                                    mkexpr(t2))))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x02: { /* MSUBV.df */
+            switch (df) {
+               case 0x00: { /* MSUBV.B */
+                     DIP("MSUBV.B w%d, w%d, w%d", wd, ws, wt);
+                     putWReg(wd,
+                             binop(Iop_Sub8x16,
+                                   getWReg(wd),
+                                   binop(Iop_Mul8x16,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               case 0x01: { /* MSUBV.H */
+                     DIP("MSUBV.H w%d, w%d, w%d", wd, ws, wt);
+                     putWReg(wd,
+                             binop(Iop_Sub16x8,
+                                   getWReg(wd),
+                                   binop(Iop_Mul16x8,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               case 0x02: { /* MSUBV.W */
+                     DIP("MSUBV.W w%d, w%d, w%d", wd, ws, wt);
+                     putWReg(wd,
+                             binop(Iop_Sub32x4,
+                                   getWReg(wd),
+                                   binop(Iop_Mul32x4,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               case 0x03: { /* MSUBV.D */
+                     DIP("MSUBV.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     putWReg(wd,
+                             binop(Iop_Sub64x2,
+                                   getWReg(wd),
+                                   binop(Iop_64HLtoV128,
+                                         binop(Iop_Mul64,
+                                               unop(Iop_V128HIto64,
+                                                    mkexpr(t1)),
+                                               unop(Iop_V128HIto64,
+                                                    mkexpr(t2))),
+                                         binop(Iop_Mul64,
+                                               unop(Iop_V128to64,
+                                                    mkexpr(t1)),
+                                               unop(Iop_V128to64,
+                                                    mkexpr(t2))))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x04: { /* DIV_S.df */
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            assign(t1, getWReg(ws));
+            assign(t2, getWReg(wt));
+
+            switch (df) {
+               case 0x00: { /* DIV_S.B */
+                     DIP("DIV_S.B w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[16];
+                     Int i;
+
+                     for (i = 0; i < 16; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                           binop(Iop_Shl32,
+                                 binop(Iop_And32,
+                                       mkU32(0xFF),
+                                       binop(Iop_DivS32,
+                                             unop(Iop_8Sto32,
+                                                  binop(Iop_GetElem8x16,
+                                                        mkexpr(t1),
+                                                        mkU8(i))),
+                                             unop(Iop_8Sto32,
+                                                  binop(Iop_GetElem8x16,
+                                                        mkexpr(t2),
+                                                        mkU8(i))))),
+                                 mkU8((i & 3) << 3)));
+                     }
+
+                     putWReg(wd,
+                          binop(Iop_64HLtoV128,
+                             binop(Iop_32HLto64,
+                                   binop(Iop_Or32,
+                                         mkexpr(tmp[15]),
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[14]),
+                                               binop(Iop_Or32,
+                                                     mkexpr(tmp[13]),
+                                                     mkexpr(tmp[12])))),
+                                   binop(Iop_Or32,
+                                         mkexpr(tmp[11]),
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[10]),
+                                               binop(Iop_Or32,
+                                                     mkexpr(tmp[9]),
+                                                     mkexpr(tmp[8]))))),
+                             binop(Iop_32HLto64,
+                                   binop(Iop_Or32,
+                                         mkexpr(tmp[7]),
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[6]),
+                                               binop(Iop_Or32,
+                                                     mkexpr(tmp[5]),
+                                                     mkexpr(tmp[4])))),
+                                   binop(Iop_Or32,
+                                         mkexpr(tmp[3]),
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[2]),
+                                               binop(Iop_Or32,
+                                                     mkexpr(tmp[1]),
+                                                     mkexpr(tmp[0]))))))
+                     );
+                     break;
+                  }
+
+               case 0x01: { /* DIV_S.H */
+                     DIP("DIV_S.H w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[8];
+                     Int i;
+
+                     for (i = 0; i < 8; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                            binop(Iop_Shl32,
+                                  binop(Iop_And32,
+                                     mkU32(0xFFFF),
+                                     binop(Iop_DivS32,
+                                           unop(Iop_16Sto32,
+                                                binop(Iop_GetElem16x8,
+                                                      mkexpr(t1),
+                                                      mkU8(i))),
+                                           unop(Iop_16Sto32,
+                                                binop(Iop_GetElem16x8,
+                                                      mkexpr(t2),
+                                                      mkU8(i))))),
+                                  mkU8((i & 1) << 4)));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_32HLto64,
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[7]),
+                                               mkexpr(tmp[6])),
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[5]),
+                                               mkexpr(tmp[4]))),
+                                   binop(Iop_32HLto64,
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[3]),
+                                               mkexpr(tmp[2])),
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[1]),
+                                               mkexpr(tmp[0])))));
+                     break;
+                  }
+
+               case 0x02: { /* DIV_S.W */
+                     DIP("DIV_S.W w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[4];
+                     Int i;
+
+                     for (i = 0; i < 4; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                               binop(Iop_DivS32,
+                                     binop(Iop_GetElem32x4,
+                                           mkexpr(t1), mkU8(i)),
+                                     binop(Iop_GetElem32x4,
+                                           mkexpr(t2), mkU8(i))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128, \
+                                   binop(Iop_32HLto64,
+                                         mkexpr(tmp[3]),
+                                         mkexpr(tmp[2])),
+                                   binop(Iop_32HLto64,
+                                         mkexpr(tmp[1]),
+                                         mkexpr(tmp[0]))));
+                     break;
+                  }
+
+               case 0x03: { /* DIV_S.D */
+                     DIP("DIV_S.D w%d, w%d, w%d", wd, ws, wt);
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_DivS64,
+                                         unop(Iop_V128HIto64,
+                                              mkexpr(t1)),
+                                         unop(Iop_V128HIto64,
+                                              mkexpr(t2))),
+                                   binop(Iop_DivS64,
+                                         unop(Iop_V128to64,
+                                              mkexpr(t1)),
+                                         unop(Iop_V128to64,
+                                              mkexpr(t2)))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x05: { /* DIV_U.df */
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            assign(t1, getWReg(ws));
+            assign(t2, getWReg(wt));
+
+            switch (df) {
+               case 0x00: { /* DIV_U.B */
+                     DIP("DIV_U.B w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[16];
+                     Int i;
+
+                     for (i = 0; i < 16; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                            binop(Iop_Shl32,
+                               binop(Iop_And32,
+                                     mkU32(0xFF),
+                                     binop(Iop_DivU32,
+                                           unop(Iop_8Uto32,
+                                                binop(Iop_GetElem8x16,
+                                                      mkexpr(t1),
+                                                      mkU8(i))),
+                                           unop(Iop_8Uto32,
+                                                binop(Iop_GetElem8x16,
+                                                      mkexpr(t2),
+                                                      mkU8(i))))),
+                               mkU8((i & 3) << 3)));
+                     }
+
+                     putWReg(wd,
+                        binop(Iop_64HLtoV128,
+                             binop(Iop_32HLto64,
+                                   binop(Iop_Or32,
+                                         mkexpr(tmp[15]),
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[14]),
+                                               binop(Iop_Or32,
+                                                     mkexpr(tmp[13]),
+                                                     mkexpr(tmp[12])))),
+                                   binop(Iop_Or32,
+                                         mkexpr(tmp[11]),
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[10]),
+                                               binop(Iop_Or32,
+                                                     mkexpr(tmp[9]),
+                                                     mkexpr(tmp[8]))))),
+                             binop(Iop_32HLto64,
+                                   binop(Iop_Or32,
+                                         mkexpr(tmp[7]),
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[6]),
+                                               binop(Iop_Or32,
+                                                     mkexpr(tmp[5]),
+                                                     mkexpr(tmp[4])))),
+                                   binop(Iop_Or32,
+                                         mkexpr(tmp[3]),
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[2]),
+                                               binop(Iop_Or32,
+                                                     mkexpr(tmp[1]),
+                                                     mkexpr(tmp[0]))))))
+                     );
+                     break;
+                  }
+
+               case 0x01: { /* DIV_U.H */
+                     DIP("DIV_U.H w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[8];
+                     Int i;
+
+                     for (i = 0; i < 8; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                           binop(Iop_Shl32,
+                               binop(Iop_And32,
+                                     mkU32(0xFFFF),
+                                     binop(Iop_DivU32,
+                                           unop(Iop_16Uto32,
+                                                binop(Iop_GetElem16x8,
+                                                      mkexpr(t1),
+                                                      mkU8(i))),
+                                           unop(Iop_16Uto32,
+                                                binop(Iop_GetElem16x8,
+                                                      mkexpr(t2),
+                                                      mkU8(i))))),
+                               mkU8((i & 1) << 4)));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_32HLto64,
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[7]),
+                                               mkexpr(tmp[6])),
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[5]),
+                                               mkexpr(tmp[4]))),
+                                   binop(Iop_32HLto64,
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[3]),
+                                               mkexpr(tmp[2])),
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[1]),
+                                               mkexpr(tmp[0])))));
+                     break;
+                  }
+
+               case 0x02: { /* DIV_U.W */
+                     DIP("DIV_U.W w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[4];
+                     Int i;
+
+                     for (i = 0; i < 4; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                               binop(Iop_DivU32,
+                                     binop(Iop_GetElem32x4,
+                                           mkexpr(t1), mkU8(i)),
+                                     binop(Iop_GetElem32x4,
+                                           mkexpr(t2), mkU8(i))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_32HLto64,
+                                         mkexpr(tmp[3]),
+                                         mkexpr(tmp[2])),
+                                   binop(Iop_32HLto64,
+                                         mkexpr(tmp[1]),
+                                         mkexpr(tmp[0]))));
+                     break;
+                  }
+
+               case 0x03: { /* DIV_U.D */
+                     DIP("DIV_U.D w%d, w%d, w%d", wd, ws, wt);
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_DivU64,
+                                         unop(Iop_V128HIto64,
+                                              mkexpr(t1)),
+                                         unop(Iop_V128HIto64,
+                                              mkexpr(t2))),
+                                   binop(Iop_DivU64,
+                                         unop(Iop_V128to64,
+                                              mkexpr(t1)),
+                                         unop(Iop_V128to64,
+                                              mkexpr(t2)))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x06: { /* MOD_S.df */
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            assign(t1, getWReg(ws));
+            assign(t2, getWReg(wt));
+
+            switch (df) {
+               case 0x00: { /* MOD_S.B */
+                     DIP("MOD_S.B w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[16];
+                     Int i;
+
+                     for (i = 0; i < 16; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                           binop(Iop_Shl32,
+                              binop(Iop_And32,
+                                  mkU32(0xFF),
+                                  unop(Iop_64HIto32,
+                                       binop(Iop_DivModS32to32,
+                                             unop(Iop_8Sto32,
+                                                  binop(Iop_GetElem8x16,
+                                                        mkexpr(t1),
+                                                        mkU8(i))),
+                                             unop(Iop_8Sto32,
+                                                  binop(Iop_GetElem8x16,
+                                                        mkexpr(t2),
+                                                        mkU8(i)))))),
+                              mkU8((i & 3) << 3)));
+                     }
+
+                     putWReg(wd,
+                        binop(Iop_64HLtoV128,
+                           binop(Iop_32HLto64,
+                                binop(Iop_Or32,
+                                      mkexpr(tmp[15]),
+                                      binop(Iop_Or32,
+                                            mkexpr(tmp[14]),
+                                            binop(Iop_Or32,
+                                                  mkexpr(tmp[13]),
+                                                  mkexpr(tmp[12])))),
+                                binop(Iop_Or32,
+                                      mkexpr(tmp[11]),
+                                      binop(Iop_Or32,
+                                            mkexpr(tmp[10]),
+                                            binop(Iop_Or32,
+                                                  mkexpr(tmp[9]),
+                                                  mkexpr(tmp[8]))))),
+                           binop(Iop_32HLto64,
+                                binop(Iop_Or32,
+                                      mkexpr(tmp[7]),
+                                      binop(Iop_Or32,
+                                            mkexpr(tmp[6]),
+                                            binop(Iop_Or32,
+                                                  mkexpr(tmp[5]),
+                                                  mkexpr(tmp[4])))),
+                                binop(Iop_Or32,
+                                      mkexpr(tmp[3]),
+                                      binop(Iop_Or32,
+                                            mkexpr(tmp[2]),
+                                            binop(Iop_Or32,
+                                                  mkexpr(tmp[1]),
+                                                  mkexpr(tmp[0])))))));
+                     break;
+                  }
+
+               case 0x01: { /* MOD_S.H */
+                     DIP("MOD_S.H w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[8];
+                     Int i;
+
+                     for (i = 0; i < 8; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                           binop(Iop_Shl32,
+                              binop(Iop_And32,
+                                  mkU32(0xFFFF),
+                                  unop(Iop_64HIto32,
+                                       binop(Iop_DivModS32to32,
+                                             unop(Iop_16Sto32,
+                                                  binop(Iop_GetElem16x8,
+                                                        mkexpr(t1),
+                                                        mkU8(i))),
+                                             unop(Iop_16Sto32,
+                                                  binop(Iop_GetElem16x8,
+                                                        mkexpr(t2),
+                                                        mkU8(i)))))),
+                              mkU8((i & 1) << 4)));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_32HLto64,
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[7]),
+                                               mkexpr(tmp[6])),
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[5]),
+                                               mkexpr(tmp[4]))),
+                                   binop(Iop_32HLto64,
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[3]),
+                                               mkexpr(tmp[2])),
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[1]),
+                                               mkexpr(tmp[0])))));
+                     break;
+                  }
+
+               case 0x02: { /* MOD_S.W */
+                     DIP("MOD_S.W w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[4];
+                     Int i;
+
+                     for (i = 0; i < 4; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                               unop(Iop_64HIto32,
+                                    binop(Iop_DivModS32to32,
+                                            binop(Iop_GetElem32x4,
+                                                  mkexpr(t1),
+                                                  mkU8(i)),
+                                          binop(Iop_GetElem32x4,
+                                                mkexpr(t2),
+                                                mkU8(i)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_32HLto64,
+                                         mkexpr(tmp[3]),
+                                         mkexpr(tmp[2])),
+                                   binop(Iop_32HLto64,
+                                         mkexpr(tmp[1]),
+                                         mkexpr(tmp[0]))));
+                     break;
+                  }
+
+               case 0x03: { /* MOD_S.D */
+                     DIP("MOD_S.D w%d, w%d, w%d", wd, ws, wt);
+                     t3 = newTemp(Ity_I64);
+                     t4 = newTemp(Ity_I64);
+                     t5 = newTemp(Ity_I64);
+                     t6 = newTemp(Ity_I64);
+                     assign(t3, unop(Iop_V128HIto64, mkexpr(t1)));
+                     assign(t4, unop(Iop_V128HIto64, mkexpr(t2)));
+                     assign(t5, unop(Iop_V128to64, mkexpr(t1)));
+                     assign(t6, unop(Iop_V128to64, mkexpr(t2)));
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_Sub64,
+                                         mkexpr(t3),
+                                         binop(Iop_Mul64,
+                                               mkexpr(t4),
+                                               binop(Iop_DivS64,
+                                                     mkexpr(t3),
+                                                     mkexpr(t4)))),
+                                   binop(Iop_Sub64,
+                                         mkexpr(t5),
+                                         binop(Iop_Mul64,
+                                               mkexpr(t6),
+                                               binop(Iop_DivS64,
+                                                     mkexpr(t5),
+                                                     mkexpr(t6))))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x07: { /* MOD_U.df */
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            assign(t1, getWReg(ws));
+            assign(t2, getWReg(wt));
+
+            switch (df) {
+               case 0x00: { /* MOD_U.B */
+                     DIP("MOD_U.B w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[16];
+                     Int i;
+
+                     for (i = 0; i < 16; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                           binop(Iop_Shl32,
+                              binop(Iop_And32,
+                                  mkU32(0xFF),
+                                  unop(Iop_64HIto32,
+                                       binop(Iop_DivModU32to32,
+                                             unop(Iop_8Uto32,
+                                                  binop(Iop_GetElem8x16,
+                                                        mkexpr(t1),
+                                                        mkU8(i))),
+                                             unop(Iop_8Uto32,
+                                                  binop(Iop_GetElem8x16,
+                                                        mkexpr(t2),
+                                                        mkU8(i)))))),
+                              mkU8((i & 3) << 3)));
+                     }
+
+                     putWReg(wd,
+                        binop(Iop_64HLtoV128,
+                          binop(Iop_32HLto64,
+                                binop(Iop_Or32,
+                                      mkexpr(tmp[15]),
+                                      binop(Iop_Or32,
+                                            mkexpr(tmp[14]),
+                                            binop(Iop_Or32,
+                                                  mkexpr(tmp[13]),
+                                                  mkexpr(tmp[12])))),
+                                binop(Iop_Or32,
+                                      mkexpr(tmp[11]),
+                                      binop(Iop_Or32,
+                                            mkexpr(tmp[10]),
+                                            binop(Iop_Or32,
+                                                  mkexpr(tmp[9]),
+                                                  mkexpr(tmp[8]))))),
+                          binop(Iop_32HLto64,
+                                binop(Iop_Or32,
+                                      mkexpr(tmp[7]),
+                                      binop(Iop_Or32,
+                                            mkexpr(tmp[6]),
+                                            binop(Iop_Or32,
+                                                  mkexpr(tmp[5]),
+                                                  mkexpr(tmp[4])))),
+                                binop(Iop_Or32,
+                                      mkexpr(tmp[3]),
+                                      binop(Iop_Or32,
+                                            mkexpr(tmp[2]),
+                                            binop(Iop_Or32,
+                                                  mkexpr(tmp[1]),
+                                                  mkexpr(tmp[0])))))));
+                     break;
+                  }
+
+               case 0x01: { /* MOD_U.H */
+                     DIP("MOD_U.H w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[8];
+                     Int i;
+
+                     for (i = 0; i < 8; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                           binop(Iop_Shl32,
+                              binop(Iop_And32,
+                                  mkU32(0xFFFF),
+                                  unop(Iop_64HIto32,
+                                       binop(Iop_DivModU32to32,
+                                             unop(Iop_16Uto32,
+                                                  binop(Iop_GetElem16x8,
+                                                        mkexpr(t1),
+                                                        mkU8(i))),
+                                             unop(Iop_16Uto32,
+                                                  binop(Iop_GetElem16x8,
+                                                        mkexpr(t2),
+                                                        mkU8(i)))))),
+                              mkU8((i & 1) << 4)));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_32HLto64,
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[7]),
+                                               mkexpr(tmp[6])),
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[5]),
+                                               mkexpr(tmp[4]))),
+                                   binop(Iop_32HLto64,
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[3]),
+                                               mkexpr(tmp[2])),
+                                         binop(Iop_Or32,
+                                               mkexpr(tmp[1]),
+                                               mkexpr(tmp[0])))));
+                     break;
+                  }
+
+               case 0x02: { /* MOD_U.W */
+                     DIP("MOD_U.W w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[4];
+                     Int i;
+
+                     for (i = 0; i < 4; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                               unop(Iop_64HIto32,
+                                    binop(Iop_DivModU32to32,
+                                          binop(Iop_GetElem32x4,
+                                                mkexpr(t1),
+                                                mkU8(i)),
+                                          binop(Iop_GetElem32x4,
+                                                mkexpr(t2),
+                                                mkU8(i)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_32HLto64,
+                                         mkexpr(tmp[3]),
+                                         mkexpr(tmp[2])),
+                                   binop(Iop_32HLto64,
+                                         mkexpr(tmp[1]),
+                                         mkexpr(tmp[0]))));
+                     break;
+                  }
+
+               case 0x03: { /* MOD_U.D */
+                     DIP("MOD_U.D w%d, w%d, w%d", wd, ws, wt);
+                     t3 = newTemp(Ity_I64);
+                     t4 = newTemp(Ity_I64);
+                     t5 = newTemp(Ity_I64);
+                     t6 = newTemp(Ity_I64);
+                     assign(t3, unop(Iop_V128HIto64, mkexpr(t1)));
+                     assign(t4, unop(Iop_V128HIto64, mkexpr(t2)));
+                     assign(t5, unop(Iop_V128to64, mkexpr(t1)));
+                     assign(t6, unop(Iop_V128to64, mkexpr(t2)));
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_Sub64,
+                                         mkexpr(t3),
+                                         binop(Iop_Mul64,
+                                               mkexpr(t4),
+                                               binop(Iop_DivU64,
+                                                     mkexpr(t3),
+                                                     mkexpr(t4)))),
+                                   binop(Iop_Sub64,
+                                         mkexpr(t5),
+                                         binop(Iop_Mul64,
+                                               mkexpr(t6),
+                                               binop(Iop_DivU64,
+                                                     mkexpr(t5),
+                                                     mkexpr(t6))))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_3R_13(UInt cins, UChar wd, UChar ws) { /* 3R (0x13) */
+   IRTemp t1, t2;
+   UShort operation;
+   UChar df, wt;
+
+   operation = (cins & 0x03800000) >> 23;
+   df = (cins & 0x00600000) >> 21;
+   wt = (cins & 0x001F0000) >> 16;
+
+   switch (operation) {
+      case 0x00: { /* DOTP_S.df */
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            assign(t1, getWReg(ws));
+            assign(t2, getWReg(wt));
+
+            switch (df) {
+               case 0x01: { /* DOTP_S.H */
+                     DIP("DOTP_S.H w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[8];
+                     Int i;
+
+                     for (i = 0; i < 8; i++) {
+                        tmp[i] = newTemp(Ity_I16);
+                        assign(tmp[i],
+                               binop(Iop_Add16,
+                                     binop(Iop_MullS8,
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i)),
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i))),
+                                     binop(Iop_MullS8,
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i + 1)),
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i + 1)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_32HLto64,
+                                         binop(Iop_16HLto32,
+                                               mkexpr(tmp[7]),
+                                               mkexpr(tmp[6])),
+                                         binop(Iop_16HLto32,
+                                               mkexpr(tmp[5]),
+                                               mkexpr(tmp[4]))),
+                                   binop(Iop_32HLto64,
+                                         binop(Iop_16HLto32,
+                                               mkexpr(tmp[3]),
+                                               mkexpr(tmp[2])),
+                                         binop(Iop_16HLto32,
+                                               mkexpr(tmp[1]),
+                                               mkexpr(tmp[0])))));
+                     break;
+                  }
+
+               case 0x02: { /* DOTP_S.W */
+                     DIP("DOTP_S.W w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[4];
+                     Int i;
+
+                     for (i = 0; i < 4; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                               binop(Iop_Add32,
+                                     binop(Iop_MullS16,
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i)),
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i))),
+                                     binop(Iop_MullS16,
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i + 1)),
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i + 1)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_32HLto64,
+                                         mkexpr(tmp[3]),
+                                         mkexpr(tmp[2])),
+                                   binop(Iop_32HLto64,
+                                         mkexpr(tmp[1]),
+                                         mkexpr(tmp[0]))));
+                     break;
+                  }
+
+               case 0x03: { /* DOTP_S.D */
+                     DIP("DOTP_S.D w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[2];
+                     Int i;
+
+                     for (i = 0; i < 2; i++) {
+                        tmp[i] = newTemp(Ity_I64);
+                        assign(tmp[i],
+                               binop(Iop_Add64,
+                                     binop(Iop_MullS32,
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i)),
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i))),
+                                     binop(Iop_MullS32,
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i + 1)),
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i + 1)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   mkexpr(tmp[1]), mkexpr(tmp[0])));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x01: { /* DOTP_U.df */
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            assign(t1, getWReg(ws));
+            assign(t2, getWReg(wt));
+
+            switch (df) {
+               case 0x01: { /* DOTP_U.H */
+                     DIP("DOTP_U.H w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[8];
+                     Int i;
+
+                     for (i = 0; i < 8; i++) {
+                        tmp[i] = newTemp(Ity_I16);
+                        assign(tmp[i],
+                               binop(Iop_Add16,
+                                     binop(Iop_MullU8,
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i)),
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i))),
+                                     binop(Iop_MullU8,
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i + 1)),
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i + 1)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_32HLto64,
+                                         binop(Iop_16HLto32,
+                                               mkexpr(tmp[7]),
+                                               mkexpr(tmp[6])),
+                                         binop(Iop_16HLto32,
+                                               mkexpr(tmp[5]),
+                                               mkexpr(tmp[4]))),
+                                   binop(Iop_32HLto64,
+                                         binop(Iop_16HLto32,
+                                               mkexpr(tmp[3]),
+                                               mkexpr(tmp[2])),
+                                         binop(Iop_16HLto32,
+                                               mkexpr(tmp[1]),
+                                               mkexpr(tmp[0])))));
+                     break;
+                  }
+
+               case 0x02: { /* DOTP_U.W */
+                     DIP("DOTP_U.W w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[4];
+                     Int i;
+
+                     for (i = 0; i < 4; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                               binop(Iop_Add32,
+                                     binop(Iop_MullU16,
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i)),
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i))),
+                                     binop(Iop_MullU16,
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i + 1)),
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i + 1)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_32HLto64,
+                                         mkexpr(tmp[3]),
+                                         mkexpr(tmp[2])),
+                                   binop(Iop_32HLto64,
+                                         mkexpr(tmp[1]),
+                                         mkexpr(tmp[0]))));
+                     break;
+                  }
+
+               case 0x03: { /* DOTP_U.D */
+                     DIP("DOTP_U.D w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[2];
+                     Int i;
+
+                     for (i = 0; i < 2; i++) {
+                        tmp[i] = newTemp(Ity_I64);
+                        assign(tmp[i],
+                               binop(Iop_Add64,
+                                     binop(Iop_MullU32,
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i)),
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i))),
+                                     binop(Iop_MullU32,
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i + 1)),
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i + 1)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   mkexpr(tmp[1]), mkexpr(tmp[0])));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x02: { /* DPADD_S.df */
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            assign(t1, getWReg(ws));
+            assign(t2, getWReg(wt));
+
+            switch (df) {
+               case 0x01: { /* DPADD_S.H */
+                     DIP("DPADD_S.H w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[8];
+                     Int i;
+
+                     for (i = 0; i < 8; i++) {
+                        tmp[i] = newTemp(Ity_I16);
+                        assign(tmp[i],
+                               binop(Iop_Add16,
+                                     binop(Iop_MullS8,
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i)),
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i))),
+                                     binop(Iop_MullS8,
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i + 1)),
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i + 1)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_Add16x8,
+                                getWReg(wd),
+                                binop(Iop_64HLtoV128,
+                                      binop(Iop_32HLto64,
+                                            binop(Iop_16HLto32,
+                                                  mkexpr(tmp[7]),
+                                                  mkexpr(tmp[6])),
+                                            binop(Iop_16HLto32,
+                                                  mkexpr(tmp[5]),
+                                                  mkexpr(tmp[4]))),
+                                      binop(Iop_32HLto64,
+                                            binop(Iop_16HLto32,
+                                                  mkexpr(tmp[3]),
+                                                  mkexpr(tmp[2])),
+                                            binop(Iop_16HLto32,
+                                                  mkexpr(tmp[1]),
+                                                  mkexpr(tmp[0]))))));
+                     break;
+                  }
+
+               case 0x02: { /* DPADD_S.W */
+                     DIP("DPADD_S.W w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[4];
+                     Int i;
+
+                     for (i = 0; i < 4; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                               binop(Iop_Add32,
+                                     binop(Iop_MullS16,
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i)),
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i))),
+                                     binop(Iop_MullS16,
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i + 1)),
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i + 1)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_Add32x4,
+                                   getWReg(wd),
+                                   binop(Iop_64HLtoV128,
+                                         binop(Iop_32HLto64,
+                                               mkexpr(tmp[3]),
+                                               mkexpr(tmp[2])),
+                                         binop(Iop_32HLto64,
+                                               mkexpr(tmp[1]),
+                                               mkexpr(tmp[0])))));
+                     break;
+                  }
+
+               case 0x03: { /* DPADD_S.D */
+                     DIP("DPADD_S.D w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[2];
+                     Int i;
+
+                     for (i = 0; i < 2; i++) {
+                        tmp[i] = newTemp(Ity_I64);
+                        assign(tmp[i],
+                               binop(Iop_Add64,
+                                     binop(Iop_MullS32,
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i)),
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i))),
+                                     binop(Iop_MullS32,
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i + 1)),
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i + 1)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_Add64x2,
+                                   getWReg(wd),
+                                   binop(Iop_64HLtoV128,
+                                         mkexpr(tmp[1]),
+                                         mkexpr(tmp[0]))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x03: { /* DPADD_U.df */
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            assign(t1, getWReg(ws));
+            assign(t2, getWReg(wt));
+
+            switch (df) {
+               case 0x01: { /* DPADD_U.H */
+                     DIP("DPADD_U.H w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[8];
+                     Int i;
+
+                     for (i = 0; i < 8; i++) {
+                        tmp[i] = newTemp(Ity_I16);
+                        assign(tmp[i],
+                               binop(Iop_Add16,
+                                     binop(Iop_MullU8,
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i)),
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i))),
+                                     binop(Iop_MullU8,
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i + 1)),
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i + 1)))));
+                     }
+
+                     putWReg(wd,
+                          binop(Iop_Add16x8,
+                                getWReg(wd),
+                                binop(Iop_64HLtoV128,
+                                      binop(Iop_32HLto64,
+                                            binop(Iop_16HLto32,
+                                                  mkexpr(tmp[7]),
+                                                  mkexpr(tmp[6])),
+                                            binop(Iop_16HLto32,
+                                                  mkexpr(tmp[5]),
+                                                  mkexpr(tmp[4]))),
+                                      binop(Iop_32HLto64,
+                                            binop(Iop_16HLto32,
+                                                  mkexpr(tmp[3]),
+                                                  mkexpr(tmp[2])),
+                                            binop(Iop_16HLto32,
+                                                  mkexpr(tmp[1]),
+                                                  mkexpr(tmp[0]))))));
+                     break;
+                  }
+
+               case 0x02: { /* DPADD_U.W */
+                     DIP("DPADD_U.W w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[4];
+                     Int i;
+
+                     for (i = 0; i < 4; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                               binop(Iop_Add32,
+                                     binop(Iop_MullU16,
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i)),
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i))),
+                                     binop(Iop_MullU16,
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i + 1)),
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i + 1)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_Add32x4,
+                                   getWReg(wd),
+                                   binop(Iop_64HLtoV128,
+                                         binop(Iop_32HLto64,
+                                               mkexpr(tmp[3]),
+                                               mkexpr(tmp[2])),
+                                         binop(Iop_32HLto64,
+                                               mkexpr(tmp[1]),
+                                               mkexpr(tmp[0])))));
+                     break;
+                  }
+
+               case 0x03: { /* DPADD_U.D */
+                     DIP("DPADD_U.D w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[2];
+                     Int i;
+
+                     for (i = 0; i < 2; i++) {
+                        tmp[i] = newTemp(Ity_I64);
+                        assign(tmp[i],
+                               binop(Iop_Add64,
+                                     binop(Iop_MullU32,
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i)),
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i))),
+                                     binop(Iop_MullU32,
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i + 1)),
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i + 1)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_Add64x2,
+                                   getWReg(wd),
+                                   binop(Iop_64HLtoV128,
+                                         mkexpr(tmp[1]),
+                                         mkexpr(tmp[0]))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x04: { /* DPSUB_S.df */
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            assign(t1, getWReg(ws));
+            assign(t2, getWReg(wt));
+
+            switch (df) {
+               case 0x01: { /* DPSUB_S.H */
+                     DIP("DPSUB_S.H w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[8];
+                     Int i;
+
+                     for (i = 0; i < 8; i++) {
+                        tmp[i] = newTemp(Ity_I16);
+                        assign(tmp[i],
+                               binop(Iop_Add16,
+                                     binop(Iop_MullS8,
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i)),
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i))),
+                                     binop(Iop_MullS8,
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i + 1)),
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i + 1)))));
+                     }
+
+                     putWReg(wd,
+                          binop(Iop_Sub16x8,
+                                getWReg(wd),
+                                binop(Iop_64HLtoV128,
+                                      binop(Iop_32HLto64,
+                                            binop(Iop_16HLto32,
+                                                  mkexpr(tmp[7]),
+                                                  mkexpr(tmp[6])),
+                                            binop(Iop_16HLto32,
+                                                  mkexpr(tmp[5]),
+                                                  mkexpr(tmp[4]))),
+                                      binop(Iop_32HLto64,
+                                            binop(Iop_16HLto32,
+                                                  mkexpr(tmp[3]),
+                                                  mkexpr(tmp[2])),
+                                            binop(Iop_16HLto32,
+                                                  mkexpr(tmp[1]),
+                                                  mkexpr(tmp[0]))))));
+                     break;
+                  }
+
+               case 0x02: { /* DPSUB_S.W */
+                     DIP("DPSUB_S.W w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[4];
+                     Int i;
+
+                     for (i = 0; i < 4; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                               binop(Iop_Add32,
+                                     binop(Iop_MullS16,
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i)),
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i))),
+                                     binop(Iop_MullS16,
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i + 1)),
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i + 1)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_Sub32x4,
+                                   getWReg(wd),
+                                   binop(Iop_64HLtoV128,
+                                         binop(Iop_32HLto64,
+                                               mkexpr(tmp[3]),
+                                               mkexpr(tmp[2])),
+                                         binop(Iop_32HLto64,
+                                               mkexpr(tmp[1]),
+                                               mkexpr(tmp[0])))));
+                     break;
+                  }
+
+               case 0x03: { /* DPSUB_S.D */
+                     DIP("DPSUB_S.D w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[2];
+                     Int i;
+
+                     for (i = 0; i < 2; i++) {
+                        tmp[i] = newTemp(Ity_I64);
+                        assign(tmp[i],
+                               binop(Iop_Add64,
+                                     binop(Iop_MullS32,
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i)),
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i))),
+                                     binop(Iop_MullS32,
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i + 1)),
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i + 1)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_Sub64x2,
+                                   getWReg(wd),
+                                   binop(Iop_64HLtoV128,
+                                         mkexpr(tmp[1]),
+                                         mkexpr(tmp[0]))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x05: { /* DPSUB_U.df */
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            assign(t1, getWReg(ws));
+            assign(t2, getWReg(wt));
+
+            switch (df) {
+               case 0x01: { /* DPSUB_U.H */
+                     DIP("DPSUB_U.H w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[8];
+                     Int i;
+
+                     for (i = 0; i < 8; i++) {
+                        tmp[i] = newTemp(Ity_I16);
+                        assign(tmp[i],
+                               binop(Iop_Add16,
+                                     binop(Iop_MullU8,
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i)),
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i))),
+                                     binop(Iop_MullU8,
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i + 1)),
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i + 1)))));
+                     }
+
+                     putWReg(wd,
+                          binop(Iop_Sub16x8,
+                                getWReg(wd),
+                                binop(Iop_64HLtoV128,
+                                      binop(Iop_32HLto64,
+                                            binop(Iop_16HLto32,
+                                                  mkexpr(tmp[7]),
+                                                  mkexpr(tmp[6])),
+                                            binop(Iop_16HLto32,
+                                                  mkexpr(tmp[5]),
+                                                  mkexpr(tmp[4]))),
+                                      binop(Iop_32HLto64,
+                                            binop(Iop_16HLto32,
+                                                  mkexpr(tmp[3]),
+                                                  mkexpr(tmp[2])),
+                                            binop(Iop_16HLto32,
+                                                  mkexpr(tmp[1]),
+                                                  mkexpr(tmp[0]))))));
+                     break;
+                  }
+
+               case 0x02: { /* DPSUB_U.W */
+                     DIP("DPSUB_U.W w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[4];
+                     Int i;
+
+                     for (i = 0; i < 4; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                               binop(Iop_Add32,
+                                     binop(Iop_MullU16,
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i)),
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i))),
+                                     binop(Iop_MullU16,
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i + 1)),
+                                           binop(Iop_GetElem16x8,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i + 1)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_Sub32x4,
+                                   getWReg(wd),
+                                   binop(Iop_64HLtoV128,
+                                         binop(Iop_32HLto64,
+                                               mkexpr(tmp[3]),
+                                               mkexpr(tmp[2])),
+                                         binop(Iop_32HLto64,
+                                               mkexpr(tmp[1]),
+                                               mkexpr(tmp[0])))));
+                     break;
+                  }
+
+               case 0x03: { /* DPSUB_U.D */
+                     DIP("DPSUB_U.D w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[2];
+                     Int i;
+
+                     for (i = 0; i < 2; i++) {
+                        tmp[i] = newTemp(Ity_I64);
+                        assign(tmp[i],
+                               binop(Iop_Add64,
+                                     binop(Iop_MullU32,
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i)),
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i))),
+                                     binop(Iop_MullU32,
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t1),
+                                                 mkU8(2 * i + 1)),
+                                           binop(Iop_GetElem32x4,
+                                                 mkexpr(t2),
+                                                 mkU8(2 * i + 1)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_Sub64x2,
+                                   getWReg(wd),
+                                   binop(Iop_64HLtoV128,
+                                         mkexpr(tmp[1]),
+                                         mkexpr(tmp[0]))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_3R_14(UInt cins, UChar wd, UChar ws) { /* 3R (0x14) */
+   IRTemp t1, t2, t3, t4;
+   IRType ty;
+   UShort operation;
+   UChar df, wt;
+
+   operation = (cins & 0x03800000) >> 23;
+   df = (cins & 0x00600000) >> 21;
+   wt = (cins & 0x001F0000) >> 16;
+   ty = mode64 ? Ity_I64 : Ity_I32;
+
+   switch (operation) {
+      case 0x00: { /* SLD.df */
+            switch (df) {
+               case 0x00: {
+                     DIP("SLD.B w%d, w%d[%d]", wd, ws, wt);
+                     t1 = newTemp(Ity_I32);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_Shl32,
+                                  binop(Iop_And32,
+                                        mkNarrowTo32(ty,
+                                                     getIReg(wt)),
+                                        mkU32(15)),
+                                  mkU8(3)));
+                     assign(t2,
+                            binop(Iop_ShrV128,
+                                  getWReg(ws),
+                                  unop(Iop_32to8, mkexpr(t1))));
+                     assign(t3,
+                            binop(Iop_ShlV128,
+                                  getWReg(wd),
+                                  unop(Iop_32to8,
+                                       binop(Iop_Sub32,
+                                             mkU32(128),
+                                             mkexpr(t1)))));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t2), mkexpr(t3)));
+                     break;
+                  }
+
+               case 0x01: {/* SLD.H */
+                     DIP("SLD.H w%d, w%d[%d]", wd, ws, wt);
+                     t1 = newTemp(Ity_I32);
+                     t2 = newTemp(Ity_I64);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_Shl32,
+                                  binop(Iop_And32,
+                                        mkNarrowTo32(ty,
+                                                     getIReg(wt)),
+                                        mkU32(7)),
+                                  mkU8(3)));
+                     assign(t2,
+                            binop(Iop_32HLto64, mkU32(0), mkexpr(t1)));
+                     assign(t3,
+                            binop(Iop_Shr64x2,
+                                  getWReg(ws),
+                                  binop(Iop_64HLtoV128,
+                                        mkexpr(t2), mkexpr(t2))));
+                     assign(t4,
+                            binop(Iop_Shl64x2,
+                                  getWReg(wd),
+                                  binop(Iop_Sub64x2,
+                                        binop(Iop_64HLtoV128,
+                                              mkU64(0x40ul),
+                                              mkU64(0x40ul)),
+                                        binop(Iop_64HLtoV128,
+                                              mkexpr(t2),
+                                              mkexpr(t2)))));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t3),
+                                   IRExpr_ITE(
+                                      binop(Iop_CmpNE32,
+                                            mkexpr(t1), mkU32(0)),
+                                      mkexpr(t4),
+                                      binop(Iop_64HLtoV128,
+                                            mkU64(0), mkU64(0)))));
+                     break;
+                  }
+
+               case 0x02: {/* SLD.W */
+                     DIP("SLD.W w%d, w%d[%d]", wd, ws, wt);
+                     t1 = newTemp(Ity_I32);
+                     t2 = newTemp(Ity_I64);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_Shl32,
+                                  binop(Iop_And32,
+                                        mkNarrowTo32(ty,
+                                                     getIReg(wt)),
+                                        mkU32(3)),
+                                  mkU8(3)));
+                     assign(t2,
+                            binop(Iop_32HLto64,
+                                  mkexpr(t1), mkexpr(t1)));
+                     assign(t3,
+                            binop(Iop_Shr32x4,
+                                  getWReg(ws),
+                                  binop(Iop_64HLtoV128,
+                                        mkexpr(t2), mkexpr(t2))));
+                     assign(t4,
+                            binop(Iop_Shl32x4,
+                                  getWReg(wd),
+                                  binop(Iop_Sub32x4,
+                                        binop(Iop_64HLtoV128,
+                                              mkU64(0x2000000020ul),
+                                              mkU64(0x2000000020ul)),
+                                        binop(Iop_64HLtoV128,
+                                              mkexpr(t2),
+                                              mkexpr(t2)))));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t3),
+                                   IRExpr_ITE(
+                                      binop(Iop_CmpNE32,
+                                            mkexpr(t1), mkU32(0)),
+                                      mkexpr(t4),
+                                      binop(Iop_64HLtoV128,
+                                            mkU64(0), mkU64(0)))));
+                     break;
+                  }
+
+               case 0x03: { /* SLD.D */
+                     DIP("SLD.D w%d, w%d[%d]", wd, ws, wt);
+                     t1 = newTemp(Ity_I32);
+                     t2 = newTemp(Ity_I64);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_Shl32,
+                                  binop(Iop_And32,
+                                        mkNarrowTo32(ty,
+                                                     getIReg(wt)),
+                                        mkU32(1)),
+                                  mkU8(3)));
+                     assign(t2,
+                            binop(Iop_32HLto64,
+                                  binop(Iop_Or32,
+                                        mkexpr(t1),
+                                        binop(Iop_Shl32,
+                                              mkexpr(t1), mkU8(16))),
+                                  binop(Iop_Or32,
+                                        mkexpr(t1),
+                                        binop(Iop_Shl32,
+                                              mkexpr(t1), mkU8(16)))));
+                     assign(t3,
+                            binop(Iop_Shr16x8,
+                                  getWReg(ws),
+                                  binop(Iop_64HLtoV128,
+                                        mkexpr(t2), mkexpr(t2))));
+                     assign(t4,
+                            binop(Iop_Shl16x8,
+                               getWReg(wd),
+                               binop(Iop_Sub16x8,
+                                     binop(Iop_64HLtoV128,
+                                           mkU64(0x10001000100010ul),
+                                           mkU64(0x10001000100010ul)),
+                                     binop(Iop_64HLtoV128,
+                                           mkexpr(t2),
+                                           mkexpr(t2)))));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t3),
+                                   IRExpr_ITE(
+                                      binop(Iop_CmpNE32,
+                                            mkexpr(t1), mkU32(0)),
+                                      mkexpr(t4),
+                                      binop(Iop_64HLtoV128,
+                                            mkU64(0), mkU64(0)))));
+                     break;
+                  }
+            }
+
+            break;
+         }
+
+      case 0x01: { /* SPLAT.df */
+            switch (df) {
+                  Int i;
+
+               case 0x00: { /* SPLAT.B */
+                     DIP("SPLAT.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_I32);
+                     assign(t1, getWReg(ws));
+                     assign(t2,
+                            mkNarrowTo32(ty, getIReg(wt)));
+                     IRTemp tmp[16];
+
+                     for (i = 0; i < 16; i++) {
+                        tmp[i] = newTemp(Ity_I8);
+                        assign(tmp[i],
+                               binop(Iop_GetElem8x16,
+                                     mkexpr(t1),
+                                     unop(Iop_32to8, mkexpr(t2))));
+                     }
+
+                     putWReg(wd,
+                          binop(Iop_64HLtoV128,
+                                binop(Iop_32HLto64,
+                                      binop(Iop_16HLto32,
+                                            binop(Iop_8HLto16,
+                                                  mkexpr(tmp[15]),
+                                                  mkexpr(tmp[14])),
+                                            binop(Iop_8HLto16,
+                                                  mkexpr(tmp[13]),
+                                                  mkexpr(tmp[12]))),
+                                      binop(Iop_16HLto32,
+                                            binop(Iop_8HLto16,
+                                                  mkexpr(tmp[11]),
+                                                  mkexpr(tmp[10])),
+                                            binop(Iop_8HLto16,
+                                                  mkexpr(tmp[9]),
+                                                  mkexpr(tmp[8])))),
+                                binop(Iop_32HLto64,
+                                      binop(Iop_16HLto32,
+                                            binop(Iop_8HLto16,
+                                                  mkexpr(tmp[7]),
+                                                  mkexpr(tmp[6])),
+                                            binop(Iop_8HLto16,
+                                                  mkexpr(tmp[5]),
+                                                  mkexpr(tmp[4]))),
+                                      binop(Iop_16HLto32,
+                                            binop(Iop_8HLto16,
+                                                  mkexpr(tmp[3]),
+                                                  mkexpr(tmp[2])),
+                                            binop(Iop_8HLto16,
+                                                  mkexpr(tmp[1]),
+                                                  mkexpr(tmp[0]))))));
+                     break;
+                  }
+
+               case 0x01: { /* SPLAT.H */
+                     DIP("SPLAT.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_I32);
+                     assign(t1, getWReg(ws));
+                     assign(t2,
+                            mkNarrowTo32(ty, getIReg(wt)));
+                     IRTemp tmp[8];
+
+                     for (i = 0; i < 8; i++) {
+                        tmp[i] = newTemp(Ity_I16);
+                        assign(tmp[i],
+                               binop(Iop_GetElem16x8,
+                                     mkexpr(t1),
+                                     unop(Iop_32to8, mkexpr(t2))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_32HLto64,
+                                         binop(Iop_16HLto32,
+                                               mkexpr(tmp[7]),
+                                               mkexpr(tmp[6])),
+                                         binop(Iop_16HLto32,
+                                               mkexpr(tmp[5]),
+                                               mkexpr(tmp[4]))),
+                                   binop(Iop_32HLto64,
+                                         binop(Iop_16HLto32,
+                                               mkexpr(tmp[3]),
+                                               mkexpr(tmp[2])),
+                                         binop(Iop_16HLto32,
+                                               mkexpr(tmp[1]),
+                                               mkexpr(tmp[0])))));
+                     break;
+                  }
+
+               case 0x02: { /* SPLAT.W */
+                     DIP("SPLAT.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_I32);
+                     assign(t1, getWReg(ws));
+                     assign(t2,
+                            mkNarrowTo32(ty, getIReg(wt)));
+                     IRTemp tmp[4];
+
+                     for (i = 0; i < 4; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                               binop(Iop_GetElem32x4,
+                                     mkexpr(t1),
+                                     unop(Iop_32to8, mkexpr(t2))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_32HLto64,
+                                         mkexpr(tmp[3]),
+                                         mkexpr(tmp[2])),
+                                   binop(Iop_32HLto64,
+                                         mkexpr(tmp[1]),
+                                         mkexpr(tmp[0]))));
+                     break;
+                  }
+
+               case 0x03: { /* SPLAT.D */
+                     DIP("SPLAT.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_I32);
+                     assign(t1, getWReg(ws));
+                     assign(t2,
+                            mkNarrowTo32(ty, getIReg(wt)));
+                     IRTemp tmp[2];
+
+                     for (i = 0; i < 2; i++) {
+                        tmp[i] = newTemp(Ity_I64);
+                        assign(tmp[i],
+                               binop(Iop_GetElem64x2,
+                                     mkexpr(t1),
+                                     unop(Iop_32to8, mkexpr(t2))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   mkexpr(tmp[1]), mkexpr(tmp[0])));
+                     break;
+                  }
+            }
+
+            break;
+         }
+
+      case 0x02: { /* PCKEV.df */
+            switch (df) {
+               case 0x00: { /* PCKEV.B */
+                     DIP("PCKEV.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_PackEvenLanes8x16,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* PCKEV.H */
+                     DIP("PCKEV.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_PackEvenLanes16x8,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* PCKEV.W */
+                     DIP("PCKEV.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_PackEvenLanes32x4,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* PCKEV.D */
+                     DIP("PCKEV.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_InterleaveLO64x2,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x03: { /* PCKOD.df */
+            switch (df) {
+               case 0x00: { /* PCKOD.B */
+                     DIP("PCKOD.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_PackOddLanes8x16,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* PCKOD.H */
+                     DIP("PCKOD.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_PackOddLanes16x8,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* PCKOD.W */
+                     DIP("PCKOD.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_PackOddLanes32x4,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* PCKOD.D */
+                     DIP("PCKOD.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_InterleaveHI64x2,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x04: { /* ILVL.df */
+            switch (df) {
+               case 0x00: { /* ILVL.B */
+                     DIP("ILVL.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_InterleaveHI8x16,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* ILVL.H */
+                     DIP("ILVL.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_InterleaveHI16x8,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* ILVL.W */
+                     DIP("ILVL.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_InterleaveHI32x4,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* ILVL.D */
+                     DIP("ILVL.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_InterleaveHI64x2,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x05: { /* ILVR.df */
+            switch (df) {
+               case 0x00: { /* ILVL.B */
+                     DIP("ILVL.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_InterleaveLO8x16,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* ILVL.H */
+                     DIP("ILVL.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_InterleaveLO16x8,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* ILVL.W */
+                     DIP("ILVL.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_InterleaveLO32x4,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* ILVL.D */
+                     DIP("ILVL.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_InterleaveLO64x2,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+            }
+
+            break;
+         }
+
+      case 0x06: { /* ILVEV.df */
+            switch (df) {
+               case 0x00: { /* ILVEV.B */
+                     DIP("ILVEV.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_InterleaveEvenLanes8x16,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* ILVEV.H */
+                     DIP("ILVEV.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_InterleaveEvenLanes16x8,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* ILVEV.W */
+                     DIP("ILVEV.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_InterleaveEvenLanes32x4,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* ILVEV.D */
+                     DIP("ILVEV.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_InterleaveLO64x2,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x07: { /* ILVOD.df */
+            switch (df) {
+               case 0x00: { /* ILVOD.B */
+                     DIP("ILVOD.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_InterleaveOddLanes8x16,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* ILVOD.H */
+                     DIP("ILVOD.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_InterleaveOddLanes16x8,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* ILVOD.W */
+                     DIP("ILVOD.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_InterleaveOddLanes32x4,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* ILVOD.D */
+                     DIP("ILVOD.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_InterleaveHI64x2,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_3R_15(UInt cins, UChar wd, UChar ws) { /* 3R (0x15) */
+   IRTemp t1, t2, t3, t4;
+   UShort operation;
+   UChar df, wt;
+
+   operation = (cins & 0x03800000) >> 23;
+   df = (cins & 0x00600000) >> 21;
+   wt = (cins & 0x001F0000) >> 16;
+
+   switch (operation) {
+      case 0x00: { /* VSHF.df */
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            t3 = newTemp(Ity_V128);
+            assign(t1, getWReg(wd));
+            assign(t2, getWReg(ws));
+            assign(t3, getWReg(wt));
+
+            switch (df) {
+               case 0x00: { /* VSHF.B */
+                     DIP("VSHF.B w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[16];
+                     Int i;
+
+                     for (i = 0; i < 16; i++) {
+                        tmp[i] = newTemp(Ity_I8);
+                        assign(tmp[i],
+                               IRExpr_ITE(
+                                  binop(Iop_CmpEQ8,
+                                        binop(Iop_And8,
+                                              binop(Iop_GetElem8x16,
+                                                    mkexpr(t1),
+                                                    mkU8(i)),
+                                              mkU8(0xC0)),
+                                        mkU8(0x0)),
+                                  IRExpr_ITE(
+                                     binop(Iop_CmpEQ8,
+                                           binop(Iop_And8,
+                                                 binop(Iop_GetElem8x16,
+                                                       mkexpr(t1),
+                                                       mkU8(i)),
+                                                 mkU8(0x10)),
+                                           mkU8(0x0)),
+                                     binop(Iop_GetElem8x16,
+                                           mkexpr(t3),
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t1),
+                                                 mkU8(i))),
+                                     binop(Iop_GetElem8x16,
+                                           mkexpr(t2),
+                                           binop(Iop_GetElem8x16,
+                                                 mkexpr(t1),
+                                                 mkU8(i)))),
+                                  mkU8(0x0)));
+                     }
+
+                     putWReg(wd,
+                          binop(Iop_64HLtoV128,
+                                binop(Iop_32HLto64,
+                                      binop(Iop_16HLto32,
+                                            binop(Iop_8HLto16,
+                                                  mkexpr(tmp[15]),
+                                                  mkexpr(tmp[14])),
+                                            binop(Iop_8HLto16,
+                                                  mkexpr(tmp[13]),
+                                                  mkexpr(tmp[12]))),
+                                      binop(Iop_16HLto32,
+                                            binop(Iop_8HLto16,
+                                                  mkexpr(tmp[11]),
+                                                  mkexpr(tmp[10])),
+                                            binop(Iop_8HLto16,
+                                                  mkexpr(tmp[9]),
+                                                  mkexpr(tmp[8])))),
+                                binop(Iop_32HLto64,
+                                      binop(Iop_16HLto32,
+                                            binop(Iop_8HLto16,
+                                                  mkexpr(tmp[7]),
+                                                  mkexpr(tmp[6])),
+                                            binop(Iop_8HLto16,
+                                                  mkexpr(tmp[5]),
+                                                  mkexpr(tmp[4]))),
+                                      binop(Iop_16HLto32,
+                                            binop(Iop_8HLto16,
+                                                  mkexpr(tmp[3]),
+                                                  mkexpr(tmp[2])),
+                                            binop(Iop_8HLto16,
+                                                  mkexpr(tmp[1]),
+                                                  mkexpr(tmp[0]))))));
+                     break;
+                  }
+
+               case 0x01: { /* VSHF.H */
+                     DIP("VSHF.H w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[8];
+                     Int i;
+
+                     for (i = 0; i < 8; i++) {
+                        tmp[i] = newTemp(Ity_I16);
+                        assign(tmp[i],
+                               IRExpr_ITE(
+                                  binop(Iop_CmpEQ16,
+                                        binop(Iop_And16,
+                                              binop(Iop_GetElem16x8,
+                                                    mkexpr(t1),
+                                                    mkU8(i)),
+                                              mkU16(0xC0)),
+                                        mkU16(0x0)),
+                                  IRExpr_ITE(
+                                     binop(Iop_CmpEQ16,
+                                           binop(Iop_And16,
+                                                 binop(Iop_GetElem16x8,
+                                                       mkexpr(t1),
+                                                       mkU8(i)),
+                                                 mkU16(0x08)),
+                                           mkU16(0x0)),
+                                     binop(Iop_GetElem16x8,
+                                           mkexpr(t3),
+                                           unop(Iop_16to8,
+                                                binop(Iop_GetElem16x8,
+                                                      mkexpr(t1),
+                                                      mkU8(i)))),
+                                     binop(Iop_GetElem16x8,
+                                           mkexpr(t2),
+                                           unop(Iop_16to8,
+                                                binop(Iop_GetElem16x8,
+                                                      mkexpr(t1),
+                                                      mkU8(i))))),
+                                  mkU16(0x0)));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_32HLto64,
+                                         binop(Iop_16HLto32,
+                                               mkexpr(tmp[7]),
+                                               mkexpr(tmp[6])),
+                                         binop(Iop_16HLto32,
+                                               mkexpr(tmp[5]),
+                                               mkexpr(tmp[4]))),
+                                   binop(Iop_32HLto64,
+                                         binop(Iop_16HLto32,
+                                               mkexpr(tmp[3]),
+                                               mkexpr(tmp[2])),
+                                         binop(Iop_16HLto32,
+                                               mkexpr(tmp[1]),
+                                               mkexpr(tmp[0])))));
+                     break;
+                  }
+
+               case 0x02: { /* VSHF.W */
+                     DIP("VSHF.W w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[4];
+                     Int i;
+
+                     for (i = 0; i < 4; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                               IRExpr_ITE(
+                                  binop(Iop_CmpEQ32,
+                                        binop(Iop_And32,
+                                              binop(Iop_GetElem32x4,
+                                                    mkexpr(t1),
+                                                    mkU8(i)),
+                                              mkU32(0xC0)),
+                                        mkU32(0x0)),
+                                  IRExpr_ITE(
+                                     binop(Iop_CmpEQ32,
+                                           binop(Iop_And32,
+                                                 binop(Iop_GetElem32x4,
+                                                       mkexpr(t1),
+                                                       mkU8(i)),
+                                                 mkU32(0x04)),
+                                           mkU32(0x0)),
+                                     binop(Iop_GetElem32x4,
+                                           mkexpr(t3),
+                                           unop(Iop_32to8,
+                                                binop(Iop_GetElem32x4,
+                                                      mkexpr(t1),
+                                                      mkU8(i)))),
+                                     binop(Iop_GetElem32x4,
+                                           mkexpr(t2),
+                                           unop(Iop_32to8,
+                                                binop(Iop_GetElem32x4,
+                                                      mkexpr(t1),
+                                                      mkU8(i))))),
+                                  mkU32(0x0)));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_32HLto64,
+                                         mkexpr(tmp[3]),
+                                         mkexpr(tmp[2])),
+                                   binop(Iop_32HLto64,
+                                         mkexpr(tmp[1]),
+                                         mkexpr(tmp[0]))));
+                     break;
+                  }
+
+               case 0x03: { /* VSHF.D */
+                     DIP("VSHF.D w%d, w%d, w%d", wd, ws, wt);
+                     IRTemp tmp[2];
+                     Int i;
+
+                     for (i = 0; i < 2; i++) {
+                        tmp[i] = newTemp(Ity_I64);
+                        assign(tmp[i],
+                               IRExpr_ITE(
+                                  binop(Iop_CmpEQ64,
+                                        binop(Iop_And64,
+                                              binop(Iop_GetElem64x2,
+                                                    mkexpr(t1),
+                                                    mkU8(i)),
+                                              mkU64(0xC0)),
+                                        mkU64(0x0)),
+                                  IRExpr_ITE(
+                                     binop(Iop_CmpEQ64,
+                                           binop(Iop_And64,
+                                                 binop(Iop_GetElem64x2,
+                                                       mkexpr(t1),
+                                                       mkU8(i)),
+                                                 mkU64(0x02)),
+                                           mkU64(0x0)),
+                                     binop(Iop_GetElem64x2,
+                                           mkexpr(t3),
+                                           unop(Iop_64to8,
+                                                binop(Iop_GetElem64x2,
+                                                      mkexpr(t1),
+                                                      mkU8(i)))),
+                                     binop(Iop_GetElem64x2,
+                                           mkexpr(t2),
+                                           unop(Iop_64to8,
+                                                binop(Iop_GetElem64x2,
+                                                      mkexpr(t1),
+                                                      mkU8(i))))),
+                                  mkU64(0x0)));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   mkexpr(tmp[1]), mkexpr(tmp[0])));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x01: { /* SRAR.df */
+            switch (df) {
+               case 0x00: { /* SRAR.B */
+                     DIP("SRAR.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_Sar8x16,
+                                  getWReg(ws),
+                                  getWReg(wt)));
+                     assign(t2,
+                            binop(Iop_Sub8x16,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x808080808080808ull),
+                                        mkU64(0x808080808080808ull)),
+                                  getWReg(wt)));
+                     assign(t4,
+                            unop(Iop_NotV128,
+                                 binop(Iop_CmpEQ8x16,
+                                       binop(Iop_ShlN8x16,
+                                             getWReg(wt),
+                                             mkU8(5)),
+                                       binop(Iop_64HLtoV128,
+                                             mkU64(0), mkU64(0)))));
+                     assign(t3,
+                            binop(Iop_ShrN8x16,
+                                  binop(Iop_AndV128,
+                                        binop(Iop_Shl8x16,
+                                              getWReg(ws),
+                                              mkexpr(t2)),
+                                        mkexpr(t4)),
+                                  mkU8(7)));
+                     putWReg(wd,
+                             binop(Iop_Add8x16,
+                                   mkexpr(t1), mkexpr(t3)));
+                     break;
+                  }
+
+               case 0x01: { /* SRAR.H */
+                     DIP("SRAR.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_Sar16x8,
+                                  getWReg(ws),
+                                  getWReg(wt)));
+                     assign(t2,
+                            binop(Iop_Sub16x8,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x10001000100010ul),
+                                        mkU64(0x10001000100010ul)),
+                                  getWReg(wt)));
+                     assign(t4,
+                            unop(Iop_NotV128,
+                                 binop(Iop_CmpEQ16x8,
+                                       binop(Iop_ShlN16x8,
+                                             getWReg(wt),
+                                             mkU8(12)),
+                                       binop(Iop_64HLtoV128,
+                                             mkU64(0), mkU64(0)))));
+                     assign(t3,
+                            binop(Iop_ShrN16x8,
+                                  binop(Iop_AndV128,
+                                        binop(Iop_Shl16x8,
+                                              getWReg(ws),
+                                              mkexpr(t2)),
+                                        mkexpr(t4)),
+                                  mkU8(15)));
+                     putWReg(wd,
+                             binop(Iop_Add16x8,
+                                   mkexpr(t1), mkexpr(t3)));
+                     break;
+                  }
+
+               case 0x02: { /* SRAR.W */
+                     DIP("SRAR.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128); // shifted
+                     t2 = newTemp(Ity_V128); // 32 - wt
+                     t3 = newTemp(Ity_V128); // rv
+                     t4 = newTemp(Ity_V128); // wt % 32 == 0
+                     assign(t1,
+                            binop(Iop_Sar32x4,
+                                  getWReg(ws),
+                                  getWReg(wt)));
+                     assign(t2,
+                            binop(Iop_Sub32x4,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x2000000020ul),
+                                        mkU64(0x2000000020ul)),
+                                  getWReg(wt)));
+                     assign(t4,
+                            unop(Iop_NotV128,
+                                 binop(Iop_CmpEQ32x4,
+                                       binop(Iop_ShlN32x4,
+                                             getWReg(wt),
+                                             mkU8(27)),
+                                       binop(Iop_64HLtoV128,
+                                             mkU64(0), mkU64(0)))));
+                     assign(t3,
+                            binop(Iop_ShrN32x4,
+                                  binop(Iop_AndV128,
+                                        binop(Iop_Shl32x4,
+                                              getWReg(ws),
+                                              mkexpr(t2)),
+                                        mkexpr(t4)),
+                                  mkU8(31)));
+                     putWReg(wd,
+                             binop(Iop_Add32x4,
+                                   mkexpr(t1), mkexpr(t3)));
+                     break;
+                  }
+
+               case 0x03: { /* SRAR.D */
+                     DIP("SRAR.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_Sar64x2,
+                                  getWReg(ws),
+                                  getWReg(wt)));
+                     assign(t2,
+                            binop(Iop_Sub64x2,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(64ul), mkU64(64ul)),
+                                  getWReg(wt)));
+                     assign(t4,
+                            unop(Iop_NotV128,
+                                 binop(Iop_CmpEQ64x2,
+                                       binop(Iop_ShlN64x2,
+                                             getWReg(wt),
+                                             mkU8(58)),
+                                       binop(Iop_64HLtoV128,
+                                             mkU64(0), mkU64(0)))));
+                     assign(t3,
+                            binop(Iop_ShrN64x2,
+                                  binop(Iop_AndV128,
+                                        binop(Iop_Shl64x2,
+                                              getWReg(ws),
+                                              mkexpr(t2)),
+                                        mkexpr(t4)),
+                                  mkU8(63)));
+                     putWReg(wd,
+                             binop(Iop_Add64x2,
+                                   mkexpr(t1), mkexpr(t3)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x02: { /* SRLR.df */
+            switch (df) {
+               case 0x00: { /* SRLR.B */
+                     DIP("SRLR.B w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_Shr8x16,
+                                  getWReg(ws),
+                                  getWReg(wt)));
+                     assign(t2,
+                            binop(Iop_Sub8x16,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x808080808080808ull),
+                                        mkU64(0x808080808080808ull)),
+                                  getWReg(wt)));
+                     assign(t4,
+                            unop(Iop_NotV128,
+                                 binop(Iop_CmpEQ8x16,
+                                       binop(Iop_ShlN8x16,
+                                             getWReg(wt),
+                                             mkU8(5)),
+                                       binop(Iop_64HLtoV128,
+                                             mkU64(0), mkU64(0)))));
+                     assign(t3,
+                            binop(Iop_ShrN8x16,
+                                  binop(Iop_AndV128,
+                                        binop(Iop_Shl8x16,
+                                              getWReg(ws),
+                                              mkexpr(t2)),
+                                        mkexpr(t4)),
+                                  mkU8(7)));
+                     putWReg(wd,
+                             binop(Iop_Add8x16,
+                                   mkexpr(t1), mkexpr(t3)));
+                     break;
+                  }
+
+               case 0x01: { /* SRLR.H */
+                     DIP("SRLR.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_Shr16x8,
+                                  getWReg(ws),
+                                  getWReg(wt)));
+                     assign(t2,
+                            binop(Iop_Sub16x8,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x10001000100010ul),
+                                        mkU64(0x10001000100010ul)),
+                                  getWReg(wt)));
+                     assign(t4,
+                            unop(Iop_NotV128,
+                                 binop(Iop_CmpEQ16x8,
+                                       binop(Iop_ShlN16x8,
+                                             getWReg(wt),
+                                             mkU8(12)),
+                                       binop(Iop_64HLtoV128,
+                                             mkU64(0), mkU64(0)))));
+                     assign(t3,
+                            binop(Iop_ShrN16x8,
+                                  binop(Iop_AndV128,
+                                        binop(Iop_Shl16x8,
+                                              getWReg(ws),
+                                              mkexpr(t2)),
+                                        mkexpr(t4)),
+                                  mkU8(15)));
+                     putWReg(wd,
+                             binop(Iop_Add16x8,
+                                   mkexpr(t1), mkexpr(t3)));
+                     break;
+                  }
+
+               case 0x02: { /* SRLR.W */
+                     DIP("SRLR.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_Shr32x4,
+                                  getWReg(ws),
+                                  getWReg(wt)));
+                     assign(t2,
+                            binop(Iop_Sub32x4,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x2000000020ul),
+                                        mkU64(0x2000000020ul)),
+                                  getWReg(wt)));
+                     assign(t4,
+                            unop(Iop_NotV128,
+                                 binop(Iop_CmpEQ32x4,
+                                       binop(Iop_ShlN32x4,
+                                             getWReg(wt),
+                                             mkU8(27)),
+                                       binop(Iop_64HLtoV128,
+                                             mkU64(0), mkU64(0)))));
+                     assign(t3,
+                            binop(Iop_ShrN32x4,
+                                  binop(Iop_AndV128,
+                                        binop(Iop_Shl32x4,
+                                              getWReg(ws),
+                                              mkexpr(t2)),
+                                        mkexpr(t4)),
+                                  mkU8(31)));
+                     putWReg(wd,
+                             binop(Iop_Add32x4,
+                                   mkexpr(t1), mkexpr(t3)));
+                     break;
+                  }
+
+               case 0x03: { /* SRLR.D */
+                     DIP("SRLR.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_Shr64x2,
+                                  getWReg(ws),
+                                  getWReg(wt)));
+                     assign(t2,
+                            binop(Iop_Sub64x2,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(64ul), mkU64(64ul)),
+                                  getWReg(wt)));
+                     assign(t4,
+                            unop(Iop_NotV128,
+                                 binop(Iop_CmpEQ64x2,
+                                       binop(Iop_ShlN64x2,
+                                             getWReg(wt),
+                                             mkU8(58)),
+                                       binop(Iop_64HLtoV128,
+                                             mkU64(0), mkU64(0)))));
+                     assign(t3,
+                            binop(Iop_ShrN64x2,
+                                  binop(Iop_AndV128,
+                                        binop(Iop_Shl64x2,
+                                              getWReg(ws),
+                                              mkexpr(t2)),
+                                        mkexpr(t4)),
+                                  mkU8(63)));
+                     putWReg(wd,
+                             binop(Iop_Add64x2,
+                                   mkexpr(t1), mkexpr(t3)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x04: { /* HADD_S.df */
+            switch (df) {
+               case 0x01: { /* HADD_S.H */
+                     DIP("HADD_S.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_Add16x8,
+                                  binop(Iop_SarN16x8,
+                                        mkexpr(t1), mkU8(8)),
+                                  binop(Iop_SarN16x8,
+                                        binop(Iop_ShlN16x8,
+                                              mkexpr(t2), mkU8(8)),
+                                        mkU8(8))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* HADD_S.W */
+                     DIP("HADD_S.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_Add32x4,
+                                  binop(Iop_SarN32x4,
+                                        mkexpr(t1), mkU8(16)),
+                                  binop(Iop_SarN32x4,
+                                        binop(Iop_ShlN32x4,
+                                              mkexpr(t2), mkU8(16)),
+                                        mkU8(16))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* HADD_S.D */
+                     DIP("HADD_S.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_Add64x2,
+                                  binop(Iop_SarN64x2,
+                                        mkexpr(t1), mkU8(32)),
+                                  binop(Iop_SarN64x2,
+                                        binop(Iop_ShlN64x2,
+                                              mkexpr(t2), mkU8(32)),
+                                        mkU8(32))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x05: { /* HADD_U.df */
+            switch (df) {
+               case 0x01: { /* HADD_U.H */
+                     DIP("HADD_U.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_Add16x8,
+                                  binop(Iop_ShrN16x8,
+                                        mkexpr(t1), mkU8(8)),
+                                  binop(Iop_ShrN16x8,
+                                        binop(Iop_ShlN16x8,
+                                              mkexpr(t2), mkU8(8)),
+                                        mkU8(8))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* HADD_U.W */
+                     DIP("HADD_U.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_Add32x4,
+                                  binop(Iop_ShrN32x4,
+                                        mkexpr(t1), mkU8(16)),
+                                  binop(Iop_ShrN32x4,
+                                        binop(Iop_ShlN32x4,
+                                              mkexpr(t2), mkU8(16)),
+                                        mkU8(16))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* HADD_U.D */
+                     DIP("HADD_U.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_Add64x2,
+                                  binop(Iop_ShrN64x2,
+                                        mkexpr(t1), mkU8(32)),
+                                  binop(Iop_ShrN64x2,
+                                        binop(Iop_ShlN64x2,
+                                              mkexpr(t2), mkU8(32)),
+                                        mkU8(32))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x06: { /* HSUB_S.df */
+            switch (df) {
+               case 0x01: { /* HSUB_S.H */
+                     DIP("HSUB_S.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_Sub16x8,
+                                  binop(Iop_SarN16x8,
+                                        mkexpr(t1), mkU8(8)),
+                                  binop(Iop_SarN16x8,
+                                        binop(Iop_ShlN16x8,
+                                              mkexpr(t2), mkU8(8)),
+                                        mkU8(8))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* HSUB_S.W */
+                     DIP("HSUB_S.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_Sub32x4,
+                                  binop(Iop_SarN32x4,
+                                        mkexpr(t1), mkU8(16)),
+                                  binop(Iop_SarN32x4,
+                                        binop(Iop_ShlN32x4,
+                                              mkexpr(t2), mkU8(16)),
+                                        mkU8(16))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* HSUB_S.D */
+                     DIP("HSUB_S.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_Sub64x2,
+                                  binop(Iop_SarN64x2,
+                                        mkexpr(t1), mkU8(32)),
+                                  binop(Iop_SarN64x2,
+                                        binop(Iop_ShlN64x2,
+                                              mkexpr(t2), mkU8(32)),
+                                        mkU8(32))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x07: { /* HSUB_U.df */
+            switch (df) {
+               case 0x01: { /* HSUB_U.H */
+                     DIP("HSUB_U.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_Sub16x8,
+                                  binop(Iop_ShrN16x8,
+                                        mkexpr(t1), mkU8(8)),
+                                  binop(Iop_ShrN16x8,
+                                        binop(Iop_ShlN16x8,
+                                              mkexpr(t2), mkU8(8)),
+                                        mkU8(8))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x02: { /* HSUB_U.W */
+                     DIP("HSUB_U.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_Sub32x4,
+                                  binop(Iop_ShrN32x4,
+                                        mkexpr(t1), mkU8(16)),
+                                  binop(Iop_ShrN32x4,
+                                        binop(Iop_ShlN32x4,
+                                              mkexpr(t2), mkU8(16)),
+                                        mkU8(16))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x03: { /* HSUB_U.D */
+                     DIP("HSUB_U.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_Sub64x2,
+                                  binop(Iop_ShrN64x2,
+                                        mkexpr(t1), mkU8(32)),
+                                  binop(Iop_ShrN64x2,
+                                        binop(Iop_ShlN64x2,
+                                              mkexpr(t2), mkU8(32)),
+                                        mkU8(32))));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_3R_1A(UInt cins, UChar wd, UChar ws) { /* 3R (0x1A) */
+   UShort operation;
+   UChar df, wt;
+
+   operation = (cins & 0x03C00000) >> 22;
+   df = (cins & 0x00200000) >> 21;
+   wt = (cins & 0x001F0000) >> 16;
+
+   switch (operation) {
+      case 0x00: { /* FCAF.df */
+            switch (df) {
+               case 0x00: { /* FCAF.W */
+                     DIP("FCAF.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCAFW, 2);
+                     putWReg(wd, binop(Iop_64HLtoV128, mkU64(0ul), mkU64(0ul)));
+                     break;
+                  }
+
+               case 0x01: { /* FCAF.D */
+                     DIP("FCAF.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCAFD, 2);
+                     putWReg(wd, binop(Iop_64HLtoV128, mkU64(0ul), mkU64(0ul)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x01: { /* FCUN.df */
+            switch (df) {
+               case 0x00: { /* FCUN.W */
+                     DIP("FCUN.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCUNW, 2);
+                     putWReg(wd, binop(Iop_CmpUN32Fx4,
+                                       getWReg(ws),
+                                       getWReg(wt)));
+                     break;
+                  }
+
+               case 0x01: { /* FCUN.D */
+                     DIP("FCUN.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCUND, 2);
+                     putWReg(wd, binop(Iop_CmpUN64Fx2,
+                                       getWReg(ws),
+                                       getWReg(wt)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x02: { /* FCEQ.df */
+            switch (df) {
+               case 0x00: { /* FCEQ.W */
+                     DIP("FCEQ.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCEQW, 2);
+                     putWReg(wd, binop(Iop_CmpEQ32Fx4,
+                                       getWReg(ws),
+                                       getWReg(wt)));
+                     break;
+                  }
+
+               case 0x01: { /* FCEQ.D */
+                     DIP("FCEQ.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCEQD, 2);
+                     putWReg(wd, binop(Iop_CmpEQ64Fx2,
+                                       getWReg(ws),
+                                       getWReg(wt)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x03: { /* FCUEQ.df */
+            switch (df) {
+               case 0x00: { /* FCUEQ.W */
+                     DIP("FCUEQ.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCUEQW, 2);
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_CmpEQ32Fx4,
+                                         getWReg(ws),
+                                         getWReg(wt)),
+                                   binop(Iop_CmpUN32Fx4,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               case 0x01: { /* FCUEQ.D */
+                     DIP("FCUEQ.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCUEQD, 2);
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_CmpEQ64Fx2,
+                                         getWReg(ws),
+                                         getWReg(wt)),
+                                   binop(Iop_CmpUN64Fx2,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x04: { /* FCLT.df */
+            switch (df) {
+               case 0x00: { /* FCLT.W */
+                     DIP("FCLT.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCLTW, 2);
+                     putWReg(wd,
+                             binop(Iop_CmpLT32Fx4,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               case 0x01: { /* FCLT.D */
+                     DIP("FCLT.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCLTD, 2);
+                     putWReg(wd,
+                             binop(Iop_CmpLT64Fx2,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x05: { /* FCULT.df */
+            switch (df) {
+               case 0x00: { /* FCULT.W */
+                     DIP("FCULT.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCULTW, 2);
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_CmpLT32Fx4,
+                                         getWReg(ws),
+                                         getWReg(wt)),
+                                   binop(Iop_CmpUN32Fx4,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               case 0x01: { /* FCULT.D */
+                     DIP("FCULT.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCULTD, 2);
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_CmpLT64Fx2,
+                                         getWReg(ws),
+                                         getWReg(wt)),
+                                   binop(Iop_CmpUN64Fx2,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x06: { /* FCLE.df */
+            switch (df) {
+               case 0x00: { /* FCLE.W */
+                     DIP("FCLE.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCLEW, 2);
+                     putWReg(wd,
+                             binop(Iop_CmpLE32Fx4,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               case 0x01: { /* FCLE.D */
+                     DIP("FCLE.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCLED, 2);
+                     putWReg(wd,
+                             binop(Iop_CmpLE64Fx2,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x07: { /* FCULE.df */
+            switch (df) {
+               case 0x00: { /* FCULE.W */
+                     DIP("FCULE.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCULEW, 2);
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_CmpLE32Fx4,
+                                         getWReg(ws),
+                                         getWReg(wt)),
+                                   binop(Iop_CmpUN32Fx4,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               case 0x01: { /* FCULE.D */
+                     DIP("FCULE.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCULED, 2);
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_CmpLE64Fx2,
+                                         getWReg(ws),
+                                         getWReg(wt)),
+                                   binop(Iop_CmpUN64Fx2,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x08: { /* FSAF.df */
+            switch (df) {
+               case 0x00: { /* FSAF.W */
+                     DIP("FSAF.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSAFW, 2);
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   mkU64(0ul), mkU64(0ul)));
+                     break;
+                  }
+
+               case 0x01: { /* FSAF.D */
+                     DIP("FSAF.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSAFD, 2);
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   mkU64(0ul), mkU64(0ul)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x09: { /* FSUN.df */
+            switch (df) {
+               case 0x00: { /* FSUN.W */
+                     DIP("FSUN.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSUNW, 2);
+                     putWReg(wd,
+                             binop(Iop_CmpUN32Fx4,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               case 0x01: { /* FSUN.D */
+                     DIP("FSUN.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSUND, 2);
+                     putWReg(wd,
+                             binop(Iop_CmpUN64Fx2,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x0A: { /* FSEQ.df */
+            switch (df) {
+               case 0x00: { /* FSEQ.W */
+                     DIP("FSEQ.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSEQW, 2);
+                     putWReg(wd,
+                             binop(Iop_CmpEQ32Fx4,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               case 0x01: { /* FSEQ.D */
+                     DIP("FSEQ.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSEQD, 2);
+                     putWReg(wd,
+                             binop(Iop_CmpEQ64Fx2,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x0B: { /* FSUEQ.df */
+            switch (df) {
+               case 0x00: { /* FSUEQ.W */
+                     DIP("FSUEQ.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSUEQW, 2);
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_CmpEQ32Fx4,
+                                         getWReg(ws),
+                                         getWReg(wt)),
+                                   binop(Iop_CmpUN32Fx4,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               case 0x01: { /* FSUEQ.D */
+                     DIP("FSUEQ.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSUEQD, 2);
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_CmpEQ64Fx2,
+                                         getWReg(ws),
+                                         getWReg(wt)),
+                                   binop(Iop_CmpUN64Fx2,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x0C: { /* FSLT.df */
+            switch (df) {
+               case 0x00: { /* FSLT.W */
+                     DIP("FSLT.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSLTW, 2);
+                     putWReg(wd,
+                             binop(Iop_CmpLT32Fx4,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               case 0x01: { /* FSLT.D */
+                     DIP("FSLT.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSLTD, 2);
+                     putWReg(wd,
+                             binop(Iop_CmpLT64Fx2,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x0D: { /* FSULT.df */
+            switch (df) {
+               case 0x00: { /* FSULT.W */
+                     DIP("FSULT.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSULTW, 2);
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_CmpLT32Fx4,
+                                         getWReg(ws),
+                                         getWReg(wt)),
+                                   binop(Iop_CmpUN32Fx4,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               case 0x01: { /* FSULT.D */
+                     DIP("FSULT.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSULTD, 2);
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_CmpLT64Fx2,
+                                         getWReg(ws),
+                                         getWReg(wt)),
+                                   binop(Iop_CmpUN64Fx2,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x0E: { /* FSLE.df */
+            switch (df) {
+               case 0x00: { /* FSLE.W */
+                     DIP("FSLE.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSLEW, 2);
+                     putWReg(wd,
+                             binop(Iop_CmpLE32Fx4,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               case 0x01: { /* FSLE.D */
+                     DIP("FSLE.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSLED, 2);
+                     putWReg(wd,
+                             binop(Iop_CmpLE64Fx2,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x0F: { /* FSULE.df */
+            switch (df) {
+               case 0x00: { /* FSULE.W */
+                     DIP("FSULE.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSULEW, 2);
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_CmpLE32Fx4,
+                                         getWReg(ws),
+                                         getWReg(wt)),
+                                   binop(Iop_CmpUN32Fx4,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               case 0x01: { /* FSULE.D */
+                     DIP("FSULE.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSULED, 2);
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_CmpLE64Fx2,
+                                         getWReg(ws),
+                                         getWReg(wt)),
+                                   binop(Iop_CmpUN64Fx2,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_3R_1B(UInt cins, UChar wd, UChar ws) { /* 3R (0x1B) */
+   IRTemp t1, t2, t3, t4;
+   UShort operation;
+   UChar df, wt;
+
+   operation = (cins & 0x03C00000) >> 22;
+   df = (cins & 0x00200000) >> 21;
+   wt = (cins & 0x001F0000) >> 16;
+
+   switch (operation) {
+      case 0x00: { /* FADD.df */
+            switch (df) {
+               case 0x00: { /* FADD.W */
+                     DIP("FADD.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FADDW, 2);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     putWReg(wd,
+                             triop(Iop_Add32Fx4, rm,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               case 0x01: { /* FADD.D */
+                     DIP("FADD.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FADDD, 2);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     putWReg(wd,
+                             triop(Iop_Add64Fx2, rm,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x01: { /* FSUB.df */
+            switch (df) {
+               case 0x00: { /* FSUB.W */
+                     DIP("FSUB.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSUBW, 2);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     putWReg(wd,
+                             triop(Iop_Sub32Fx4, rm,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               case 0x01: { /* FSUB.D */
+                     DIP("FSUB.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSUBD, 2);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     putWReg(wd,
+                             triop(Iop_Sub64Fx2, rm,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x02: { /* FMUL.df */
+            switch (df) {
+               case 0x00: { /* FMUL.W */
+                     DIP("FMUL.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FMULW, 2);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     putWReg(wd,
+                             triop(Iop_Mul32Fx4, rm,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               case 0x01: { /* FMUL.D */
+                     DIP("FMUL.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FMULW, 2);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     putWReg(wd,
+                             triop(Iop_Mul64Fx2, rm,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x03: { /* FDIV.df */
+            switch (df) {
+               case 0x00: { /* FDIV.W */
+                     DIP("FDIV.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FDIVW, 2);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     putWReg(wd,
+                             triop(Iop_Div32Fx4, rm,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               case 0x01: { /* FDIV.D */
+                     DIP("FDIV.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FDIVD, 2);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     putWReg(wd,
+                             triop(Iop_Div64Fx2, rm,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x04: { /* FMADD.df */
+            switch (df) {
+               case 0x00: { /* FMADD.W */
+                     DIP("FMADD.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FMADDW, 2);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     IRTemp tmp[4];
+                     Int i;
+
+                     for (i = 0; i < 4; i++) {
+                        tmp[i] = newTemp(Ity_F32);
+                        assign(tmp[i],
+                               qop(Iop_MAddF32, rm,
+                                   unop(Iop_ReinterpI32asF32,
+                                        binop(Iop_GetElem32x4,
+                                              getWReg(ws),
+                                              mkU8(i))),
+                                   unop(Iop_ReinterpI32asF32,
+                                        binop(Iop_GetElem32x4,
+                                              getWReg(wt),
+                                              mkU8(i))),
+                                   unop(Iop_ReinterpI32asF32,
+                                        binop(Iop_GetElem32x4,
+                                              getWReg(wd),
+                                              mkU8(i)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_32HLto64,
+                                         unop(Iop_ReinterpF32asI32,
+                                              mkexpr(tmp[3])),
+                                         unop(Iop_ReinterpF32asI32,
+                                              mkexpr(tmp[2]))),
+                                   binop(Iop_32HLto64,
+                                         unop(Iop_ReinterpF32asI32,
+                                              mkexpr(tmp[1])),
+                                         unop(Iop_ReinterpF32asI32,
+                                              mkexpr(tmp[0])))));
+                     break;
+                  }
+
+               case 0x01: { /* FMADD.D */
+                     DIP("FMADD.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FMADDW, 2);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     IRTemp tmp[2];
+                     Int i;
+
+                     for (i = 0; i < 2; i++) {
+                        tmp[i] = newTemp(Ity_F64);
+                        assign(tmp[i],
+                               qop(Iop_MAddF64, rm,
+                                   unop(Iop_ReinterpI64asF64,
+                                        binop(Iop_GetElem64x2,
+                                              getWReg(ws),
+                                              mkU8(i))),
+                                   unop(Iop_ReinterpI64asF64,
+                                        binop(Iop_GetElem64x2,
+                                              getWReg(wt),
+                                              mkU8(i))),
+                                   unop(Iop_ReinterpI64asF64,
+                                        binop(Iop_GetElem64x2,
+                                              getWReg(wd),
+                                              mkU8(i)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   unop(Iop_ReinterpF64asI64,
+                                        mkexpr(tmp[1])),
+                                   unop(Iop_ReinterpF64asI64,
+                                        mkexpr(tmp[0]))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x05: { /* FMSUB.df */
+            switch (df) {
+               case 0x00: { /* FMSUB.W */
+                     DIP("FMSUB.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FMADDW, 2);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     IRTemp tmp[4];
+                     Int i;
+
+                     for (i = 0; i < 4; i++) {
+                        tmp[i] = newTemp(Ity_F32);
+                        assign(tmp[i],
+                               qop(Iop_MSubF32, rm,
+                                   unop(Iop_ReinterpI32asF32,
+                                        binop(Iop_GetElem32x4,
+                                              getWReg(ws),
+                                              mkU8(i))),
+                                   unop(Iop_ReinterpI32asF32,
+                                        binop(Iop_GetElem32x4,
+                                              getWReg(wt),
+                                              mkU8(i))),
+                                   unop(Iop_ReinterpI32asF32,
+                                        binop(Iop_GetElem32x4,
+                                              getWReg(wd),
+                                              mkU8(i)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_32HLto64,
+                                         unop(Iop_ReinterpF32asI32,
+                                              mkexpr(tmp[3])),
+                                         unop(Iop_ReinterpF32asI32,
+                                              mkexpr(tmp[2]))),
+                                   binop(Iop_32HLto64,
+                                         unop(Iop_ReinterpF32asI32,
+                                              mkexpr(tmp[1])),
+                                         unop(Iop_ReinterpF32asI32,
+                                              mkexpr(tmp[0])))));
+                     break;
+                  }
+
+               case 0x01: { /* FMSUB.D */
+                     DIP("FMSUB.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FMADDD, 2);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     IRTemp tmp[2];
+                     Int i;
+
+                     for (i = 0; i < 2; i++) {
+                        tmp[i] = newTemp(Ity_F64);
+                        assign(tmp[i],
+                               qop(Iop_MSubF64, rm,
+                                   unop(Iop_ReinterpI64asF64,
+                                        binop(Iop_GetElem64x2,
+                                              getWReg(ws),
+                                              mkU8(i))),
+                                   unop(Iop_ReinterpI64asF64,
+                                        binop(Iop_GetElem64x2,
+                                              getWReg(wt),
+                                              mkU8(i))),
+                                   unop(Iop_ReinterpI64asF64,
+                                        binop(Iop_GetElem64x2,
+                                              getWReg(wd),
+                                              mkU8(i)))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   unop(Iop_ReinterpF64asI64,
+                                        mkexpr(tmp[1])),
+                                   unop(Iop_ReinterpF64asI64,
+                                        mkexpr(tmp[0]))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x07: { /* FEXP2.df */
+            switch (df) {
+               case 0x00: { /* FEXP2.W */
+                     DIP("FEXP2.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FEXP2W, 2);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     putWReg(wd,
+                             triop(Iop_Scale2_32Fx4, rm,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               case 0x01: { /* FEXP2.D */
+                     DIP("FEXP2.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FEXP2D, 2);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     putWReg(wd,
+                             triop(Iop_Scale2_64Fx2, rm,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x08: { /* FEXDO.df */
+            switch (df) {
+               case 0x00: { /* FEXDO.H */
+                     DIP("FEXDO.H w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FEXDOH, 2);
+                     t1 = newTemp(Ity_I64);
+                     t2 = newTemp(Ity_I64);
+                     assign(t1,
+                            unop(Iop_F32toF16x4,
+                                 getWReg(ws)));
+                     assign(t2,
+                            unop(Iop_F32toF16x4,
+                                 getWReg(wt)));
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   mkexpr(t1), mkexpr(t2)));
+                     break;
+                  }
+
+               case 0x01: { /* FEXDO.W */
+                     DIP("FEXDO.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FEXDOW, 2);
+                     t1 = newTemp(Ity_I32);
+                     t2 = newTemp(Ity_I32);
+                     t3 = newTemp(Ity_I32);
+                     t4 = newTemp(Ity_I32);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     assign(t1,
+                            unop(Iop_ReinterpF32asI32,
+                                 binop(Iop_F64toF32, rm,
+                                       unop(Iop_ReinterpI64asF64,
+                                            unop(Iop_V128to64,
+                                                 getWReg(ws))))));
+                     assign(t2,
+                            unop(Iop_ReinterpF32asI32,
+                                 binop(Iop_F64toF32, rm,
+                                       unop(Iop_ReinterpI64asF64,
+                                            unop(Iop_V128HIto64,
+                                                 getWReg(ws))))));
+                     assign(t3,
+                            unop(Iop_ReinterpF32asI32,
+                                 binop(Iop_F64toF32, rm,
+                                       unop(Iop_ReinterpI64asF64,
+                                            unop(Iop_V128to64,
+                                                 getWReg(wt))))));
+                     assign(t4,
+                            unop(Iop_ReinterpF32asI32,
+                                 binop(Iop_F64toF32, rm,
+                                       unop(Iop_ReinterpI64asF64,
+                                            unop(Iop_V128HIto64,
+                                                 getWReg(wt))))));
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_32HLto64,
+                                         mkexpr(t2), mkexpr(t1)),
+                                   binop(Iop_32HLto64,
+                                         mkexpr(t4), mkexpr(t3))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x0A: { /* FTQ.df */
+            switch (df) {
+               case 0x00: { /* FTQ.H */
+                     DIP("FTQ.H w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FTQH, 2);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     putWReg(wd,
+                             triop(Iop_F32x4_2toQ16x8, rm,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               case 0x01: { /* FTQ.W */
+                     DIP("FTQ.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FTQW, 2);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     putWReg(wd,
+                             triop(Iop_F64x2_2toQ32x4, rm,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x0C: { /* FMIN.df */
+            switch (df) {
+               case 0x00: { /* FMIN.W */
+                     DIP("FMIN.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FMINW, 2);
+                     putWReg(wd,
+                             binop(Iop_Min32Fx4,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               case 0x01: { /* FMIN.D */
+                     DIP("FMIN.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FMINW, 2);
+                     putWReg(wd,
+                             binop(Iop_Min64Fx2,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x0D: { /* FMIN_A.df */
+            switch (df) {
+               case 0x00: { /* FMIN_A.W */
+                     DIP("FMIN_A.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FMINAW, 2);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_AndV128,
+                                  getWReg(ws),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x7FFFFFFF7FFFFFFF),
+                                        mkU64(0x7FFFFFFF7FFFFFFF))));
+                     assign(t2,
+                            binop(Iop_AndV128,
+                                  getWReg(wt),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x7FFFFFFF7FFFFFFF),
+                                        mkU64(0x7FFFFFFF7FFFFFFF))));
+                     assign(t3,
+                            binop(Iop_Min32Fx4,
+                                  mkexpr(t2), mkexpr(t1)));
+                     assign(t4,
+                         binop(Iop_AndV128,
+                            binop(Iop_AndV128,
+                                  unop(Iop_NotV128,
+                                       binop(Iop_CmpUN32Fx4,
+                                             mkexpr(t3),
+                                             mkexpr(t3))),
+                                  binop(Iop_OrV128,
+                                     binop(Iop_AndV128,
+                                           binop(Iop_CmpEQ32Fx4,
+                                                 mkexpr(t1),
+                                                 mkexpr(t2)),
+                                           binop(Iop_OrV128,
+                                                 getWReg(ws),
+                                                 getWReg(wt))),
+                                     binop(Iop_OrV128,
+                                           binop(Iop_AndV128,
+                                              binop(Iop_OrV128,
+                                                 binop(Iop_CmpUN32Fx4,
+                                                       mkexpr(t1),
+                                                       mkexpr(t1)),
+                                                 binop(Iop_CmpLT32Fx4,
+                                                       mkexpr(t3),
+                                                       mkexpr(t1))),
+                                           getWReg(wt)),
+                                           binop(Iop_AndV128,
+                                              binop(Iop_OrV128,
+                                                 binop(Iop_CmpUN32Fx4,
+                                                       mkexpr(t2),
+                                                       mkexpr(t2)),
+                                                 binop(Iop_CmpLT32Fx4,
+                                                       mkexpr(t3),
+                                                       mkexpr(t2))),
+                                                 getWReg(ws))))),
+                            binop(Iop_64HLtoV128,
+                                  mkU64(0x8000000080000000),
+                                  mkU64(0x8000000080000000))));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t3), mkexpr(t4)));
+                     break;
+                  }
+
+               case 0x01: { /* FMIN_A.D */
+                     DIP("FMIN_A.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FMINAD, 2);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_AndV128,
+                                  getWReg(ws),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x7FFFFFFFFFFFFFFF),
+                                        mkU64(0x7FFFFFFFFFFFFFFF))));
+                     assign(t2,
+                            binop(Iop_AndV128,
+                                  getWReg(wt),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x7FFFFFFFFFFFFFFF),
+                                        mkU64(0x7FFFFFFFFFFFFFFF))));
+                     assign(t3,
+                            binop(Iop_Min64Fx2,
+                                  mkexpr(t2), mkexpr(t1)));
+                     assign(t4,
+                         binop(Iop_AndV128,
+                            binop(Iop_AndV128,
+                                  unop(Iop_NotV128,
+                                       binop(Iop_CmpUN64Fx2,
+                                             mkexpr(t3),
+                                             mkexpr(t3))),
+                                  binop(Iop_OrV128,
+                                     binop(Iop_AndV128,
+                                           binop(Iop_CmpEQ64Fx2,
+                                                 mkexpr(t1),
+                                                 mkexpr(t2)),
+                                           binop(Iop_OrV128,
+                                                 getWReg(ws),
+                                                 getWReg(wt))),
+                                     binop(Iop_OrV128,
+                                        binop(Iop_AndV128,
+                                              binop(Iop_OrV128,
+                                                 binop(Iop_CmpUN64Fx2,
+                                                       mkexpr(t1),
+                                                       mkexpr(t1)),
+                                                 binop(Iop_CmpLT64Fx2,
+                                                       mkexpr(t3),
+                                                       mkexpr(t1))),
+                                              getWReg(wt)),
+                                        binop(Iop_AndV128,
+                                              binop(Iop_OrV128,
+                                                 binop(Iop_CmpUN64Fx2,
+                                                       mkexpr(t2),
+                                                       mkexpr(t2)),
+                                                 binop(Iop_CmpLT64Fx2,
+                                                       mkexpr(t3),
+                                                       mkexpr(t2))),
+                                              getWReg(ws))))),
+                            binop(Iop_64HLtoV128,
+                                  mkU64(0x8000000000000000),
+                                  mkU64(0x8000000000000000))));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t3), mkexpr(t4)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x0E: { /* FMAX.df */
+            switch (df) {
+               case 0x00: { /* FMAX.W */
+                     DIP("FMAX.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FMAXW, 2);
+                     putWReg(wd,
+                             binop(Iop_Max32Fx4,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               case 0x01: { /* FMAX.D */
+                     DIP("FMAX.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FMAXW, 2);
+                     putWReg(wd,
+                             binop(Iop_Max64Fx2,
+                                   getWReg(ws),
+                                   getWReg(wt)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x0F: { /* FMAX_A.df */
+            switch (df) {
+               case 0x00: { /* FMAX_A.W */
+                     DIP("FMAX_A.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FMAXAW, 2);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_AndV128,
+                                  getWReg(ws),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x7FFFFFFF7FFFFFFF),
+                                        mkU64(0x7FFFFFFF7FFFFFFF))));
+                     assign(t2,
+                            binop(Iop_AndV128,
+                                  getWReg(wt),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x7FFFFFFF7FFFFFFF),
+                                        mkU64(0x7FFFFFFF7FFFFFFF))));
+                     assign(t3,
+                            binop(Iop_Max32Fx4,
+                                  mkexpr(t2), mkexpr(t1)));
+                     assign(t4,
+                         binop(Iop_AndV128,
+                            binop(Iop_AndV128,
+                                  unop(Iop_NotV128,
+                                       binop(Iop_CmpUN32Fx4,
+                                             mkexpr(t3),
+                                             mkexpr(t3))),
+                                  binop(Iop_OrV128,
+                                     binop(Iop_AndV128,
+                                           binop(Iop_CmpEQ32Fx4,
+                                                 mkexpr(t1),
+                                                 mkexpr(t2)),
+                                           binop(Iop_AndV128,
+                                                 getWReg(ws),
+                                                 getWReg(wt))),
+                                     binop(Iop_OrV128,
+                                        binop(Iop_AndV128,
+                                              binop(Iop_OrV128,
+                                                 binop(Iop_CmpUN32Fx4,
+                                                       mkexpr(t1),
+                                                       mkexpr(t1)),
+                                                 binop(Iop_CmpLT32Fx4,
+                                                       mkexpr(t1),
+                                                       mkexpr(t3))),
+                                              getWReg(wt)),
+                                        binop(Iop_AndV128,
+                                              binop(Iop_OrV128,
+                                                 binop(Iop_CmpUN32Fx4,
+                                                       mkexpr(t2),
+                                                       mkexpr(t2)),
+                                                 binop(Iop_CmpLT32Fx4,
+                                                       mkexpr(t2),
+                                                       mkexpr(t3))),
+                                              getWReg(ws))))),
+                            binop(Iop_64HLtoV128,
+                                  mkU64(0x8000000080000000),
+                                  mkU64(0x8000000080000000))));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t3), mkexpr(t4)));
+                     break;
+                  }
+
+               case 0x01: { /* FMAX_A.D */
+                     DIP("FMAX_A.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FMAXAD, 2);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_AndV128,
+                                  getWReg(ws),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x7FFFFFFFFFFFFFFF),
+                                        mkU64(0x7FFFFFFFFFFFFFFF))));
+                     assign(t2,
+                            binop(Iop_AndV128,
+                                  getWReg(wt),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x7FFFFFFFFFFFFFFF),
+                                        mkU64(0x7FFFFFFFFFFFFFFF))));
+                     assign(t3,
+                            binop(Iop_Max64Fx2,
+                                  mkexpr(t2), mkexpr(t1)));
+                     assign(t4,
+                         binop(Iop_AndV128,
+                            binop(Iop_AndV128,
+                                  unop(Iop_NotV128,
+                                       binop(Iop_CmpUN64Fx2,
+                                             mkexpr(t3),
+                                             mkexpr(t3))),
+                                  binop(Iop_OrV128,
+                                     binop(Iop_AndV128,
+                                           binop(Iop_CmpEQ64Fx2,
+                                                 mkexpr(t1),
+                                                 mkexpr(t2)),
+                                           binop(Iop_AndV128,
+                                                 getWReg(ws),
+                                                 getWReg(wt))),
+                                     binop(Iop_OrV128,
+                                           binop(Iop_AndV128,
+                                              binop(Iop_OrV128,
+                                                 binop(Iop_CmpUN64Fx2,
+                                                       mkexpr(t1),
+                                                       mkexpr(t1)),
+                                                 binop(Iop_CmpLT64Fx2,
+                                                       mkexpr(t1),
+                                                       mkexpr(t3))),
+                                                 getWReg(wt)),
+                                           binop(Iop_AndV128,
+                                              binop(Iop_OrV128,
+                                                 binop(Iop_CmpUN64Fx2,
+                                                       mkexpr(t2),
+                                                       mkexpr(t2)),
+                                                 binop(Iop_CmpLT64Fx2,
+                                                       mkexpr(t2),
+                                                       mkexpr(t3))),
+                                                 getWReg(ws))))),
+                            binop(Iop_64HLtoV128,
+                                  mkU64(0x8000000000000000),
+                                  mkU64(0x8000000000000000))));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   mkexpr(t3), mkexpr(t4)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_3R_1C(UInt cins, UChar wd, UChar ws) { /* 3R (0x1C) */
+   IRTemp t1, t2, t3, t4, t5, t6;
+   UShort operation;
+   UChar df, wt;
+
+   operation = (cins & 0x03C00000) >> 22;
+   df = (cins & 0x00200000) >> 21;
+   wt = (cins & 0x001F0000) >> 16;
+
+   switch (operation) {
+      case 0x01: { /* FCOR.df */
+            switch (df) {
+               case 0x00: { /* FCOR.W */
+                     DIP("FCOR.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCORW, 2);
+                     putWReg(wd,
+                             unop(Iop_NotV128,
+                                  binop(Iop_CmpUN32Fx4,
+                                        getWReg(ws),
+                                        getWReg(wt))));
+                     break;
+                  }
+
+               case 0x01: { /* FCOR.D */
+                     DIP("FCOR.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCORD, 2);
+                     putWReg(wd,
+                             unop(Iop_NotV128,
+                                  binop(Iop_CmpUN64Fx2,
+                                        getWReg(ws),
+                                        getWReg(wt))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x02: { /* FCUNE.df */
+            switch (df) {
+               case 0x00: { /* FCUNE.W */
+                     DIP("FCUNE.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCUNEW, 2);
+                     putWReg(wd,
+                             unop(Iop_NotV128,
+                                  binop(Iop_CmpEQ32Fx4,
+                                        getWReg(ws),
+                                        getWReg(wt))));
+                     break;
+                  }
+
+               case 0x01: { /* FCUNE.D */
+                     DIP("FCUNE.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCUNED, 2);
+                     putWReg(wd,
+                             unop(Iop_NotV128,
+                                  binop(Iop_CmpEQ64Fx2,
+                                        getWReg(ws),
+                                        getWReg(wt))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x03: { /* FCNE.df */
+            switch (df) {
+               case 0x00: { /* FCNE.W */
+                     DIP("FCNE.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCNEW, 2);
+                     putWReg(wd,
+                             binop(Iop_XorV128,
+                                   unop(Iop_NotV128,
+                                        binop(Iop_CmpEQ32Fx4,
+                                              getWReg(ws),
+                                              getWReg(wt))),
+                                   binop(Iop_CmpUN32Fx4,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               case 0x01: { /* FCNE.D */
+                     DIP("FCNE.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FCNED, 2);
+                     putWReg(wd,
+                             binop(Iop_XorV128,
+                                   unop(Iop_NotV128,
+                                        binop(Iop_CmpEQ64Fx2,
+                                              getWReg(ws),
+                                              getWReg(wt))),
+                                   binop(Iop_CmpUN64Fx2,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x04: { /* MUL_Q.df */
+            switch (df) {
+               case 0x00: { /* MUL_Q.H */
+                     DIP("MUL_Q.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_QDMulHi16Sx8,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* MUL_Q.W */
+                     DIP("MUL_Q.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3,
+                            binop(Iop_QDMulHi32Sx4,
+                                  mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x05: { /* MADD_Q.df */
+            switch (df) {
+               case 0x00: { /* MADD_Q.W */
+                     DIP("MADD_Q.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     t5 = newTemp(Ity_V128);
+                     t6 = newTemp(Ity_V128);
+                     assign(t1, // even
+                            binop(Iop_SarN32x4,
+                                  binop(Iop_InterleaveEvenLanes16x8,
+                                        getWReg(ws),
+                                        getWReg(ws)),
+                                  mkU8(16)));
+                     assign(t2, // odd
+                            binop(Iop_SarN32x4,
+                                  getWReg(ws), mkU8(16)));
+                     assign(t3, // even
+                            binop(Iop_SarN32x4,
+                                  binop(Iop_InterleaveEvenLanes16x8,
+                                        getWReg(wt),
+                                        getWReg(wt)),
+                                  mkU8(16)));
+                     assign(t4, // odd
+                            binop(Iop_SarN32x4,
+                                  getWReg(wt), mkU8(16)));
+                     assign(t5,
+                         binop(Iop_Add32x4,
+                            binop(Iop_ShlN32x4,
+                                  binop(Iop_SarN32x4,
+                                     binop(Iop_InterleaveEvenLanes16x8,
+                                           getWReg(wd),
+                                           getWReg(wd)),
+                                     mkU8(16)),
+                                  mkU8(15)),
+                            binop(Iop_Mul32x4,
+                                  mkexpr(t1), mkexpr(t3))));
+                     assign(t6,
+                            binop(Iop_Add32x4,
+                                  binop(Iop_ShlN32x4,
+                                        binop(Iop_SarN32x4,
+                                              getWReg(wd),
+                                              mkU8(16)),
+                                        mkU8(15)),
+                                  binop(Iop_Mul32x4,
+                                        mkexpr(t2), mkexpr(t4))));
+                     putWReg(wd,
+                             binop(Iop_InterleaveEvenLanes16x8,
+                                   binop(Iop_QandQSarNnarrow32Sto16Sx4,
+                                         mkexpr(t6), mkU8(15)),
+                                   binop(Iop_QandQSarNnarrow32Sto16Sx4,
+                                         mkexpr(t5), mkU8(15))));
+                     break;
+                  }
+
+               case 0x01: { /* MADD_Q.W */
+                     DIP("MADD_Q.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     t5 = newTemp(Ity_V128);
+                     t6 = newTemp(Ity_V128);
+                     assign(t1, // even
+                            binop(Iop_SarN64x2,
+                                  binop(Iop_InterleaveEvenLanes32x4,
+                                        getWReg(ws),
+                                        getWReg(ws)),
+                                  mkU8(32)));
+                     assign(t2, // odd
+                            binop(Iop_SarN64x2,
+                                  getWReg(ws), mkU8(32)));
+                     assign(t3, // even
+                            binop(Iop_SarN64x2,
+                                  binop(Iop_InterleaveEvenLanes32x4,
+                                        getWReg(wt),
+                                        getWReg(wt)),
+                                  mkU8(32)));
+                     assign(t4, // odd
+                            binop(Iop_SarN64x2,
+                                  getWReg(wt), mkU8(32)));
+                     assign(t5,
+                         binop(Iop_Add64x2,
+                            binop(Iop_ShlN64x2,
+                                  binop(Iop_SarN64x2,
+                                     binop(Iop_InterleaveEvenLanes32x4,
+                                           getWReg(wd),
+                                           getWReg(wd)),
+                                     mkU8(32)),
+                                  mkU8(31)),
+                            binop(Iop_64HLtoV128,
+                                  binop(Iop_Mul64,
+                                        unop(Iop_V128HIto64,
+                                             mkexpr(t1)),
+                                        unop(Iop_V128HIto64,
+                                             mkexpr(t3))),
+                                  binop(Iop_Mul64,
+                                        unop(Iop_V128to64,
+                                             mkexpr(t1)),
+                                        unop(Iop_V128to64,
+                                             mkexpr(t3))))));
+                     assign(t6,
+                            binop(Iop_Add64x2,
+                                  binop(Iop_ShlN64x2,
+                                        binop(Iop_SarN64x2,
+                                              getWReg(wd),
+                                              mkU8(32)),
+                                        mkU8(31)),
+                                  binop(Iop_64HLtoV128,
+                                        binop(Iop_Mul64,
+                                              unop(Iop_V128HIto64,
+                                                   mkexpr(t2)),
+                                              unop(Iop_V128HIto64,
+                                                   mkexpr(t4))),
+                                        binop(Iop_Mul64,
+                                              unop(Iop_V128to64,
+                                                   mkexpr(t2)),
+                                              unop(Iop_V128to64,
+                                                   mkexpr(t4))))));
+                     putWReg(wd,
+                             binop(Iop_InterleaveEvenLanes32x4,
+                                   binop(Iop_QandQSarNnarrow64Sto32Sx2,
+                                         mkexpr(t6), mkU8(31)),
+                                   binop(Iop_QandQSarNnarrow64Sto32Sx2,
+                                         mkexpr(t5), mkU8(31))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x06: { /* MSUB_Q.df */
+            switch (df) {
+               case 0x00: { /* MSUB_Q.H */
+                     DIP("MSUB_Q.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     t5 = newTemp(Ity_V128);
+                     t6 = newTemp(Ity_V128);
+                     assign(t1, // even
+                            binop(Iop_SarN32x4,
+                                  binop(Iop_InterleaveEvenLanes16x8,
+                                        getWReg(ws),
+                                        getWReg(ws)),
+                                  mkU8(16)));
+                     assign(t2, // odd
+                            binop(Iop_SarN32x4,
+                                  getWReg(ws), mkU8(16)));
+                     assign(t3, // even
+                            binop(Iop_SarN32x4,
+                                  binop(Iop_InterleaveEvenLanes16x8,
+                                        getWReg(wt),
+                                        getWReg(wt)),
+                                  mkU8(16)));
+                     assign(t4, // odd
+                            binop(Iop_SarN32x4,
+                                  getWReg(wt), mkU8(16)));
+                     assign(t5,
+                         binop(Iop_Sub32x4,
+                            binop(Iop_ShlN32x4,
+                               binop(Iop_SarN32x4,
+                                     binop(Iop_InterleaveEvenLanes16x8,
+                                           getWReg(wd),
+                                           getWReg(wd)),
+                                     mkU8(16)),
+                               mkU8(15)),
+                            binop(Iop_Mul32x4,
+                                  mkexpr(t1), mkexpr(t3))));
+                     assign(t6,
+                            binop(Iop_Sub32x4,
+                                  binop(Iop_ShlN32x4,
+                                        binop(Iop_SarN32x4,
+                                              getWReg(wd),
+                                              mkU8(16)),
+                                        mkU8(15)),
+                                  binop(Iop_Mul32x4,
+                                        mkexpr(t2), mkexpr(t4))));
+                     putWReg(wd,
+                             binop(Iop_InterleaveEvenLanes16x8,
+                                   binop(Iop_QandQSarNnarrow32Sto16Sx4,
+                                         mkexpr(t6), mkU8(15)),
+                                   binop(Iop_QandQSarNnarrow32Sto16Sx4,
+                                         mkexpr(t5), mkU8(15))));
+                     break;
+                  }
+
+               case 0x01: { /* MSUB_Q.W */
+                     DIP("MSUB_Q.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     t5 = newTemp(Ity_V128);
+                     t6 = newTemp(Ity_V128);
+                     assign(t1, // even
+                            binop(Iop_SarN64x2,
+                                  binop(Iop_InterleaveEvenLanes32x4,
+                                        getWReg(ws),
+                                        getWReg(ws)),
+                                  mkU8(32)));
+                     assign(t2, // odd
+                            binop(Iop_SarN64x2,
+                                  getWReg(ws), mkU8(32)));
+                     assign(t3, // even
+                            binop(Iop_SarN64x2,
+                                  binop(Iop_InterleaveEvenLanes32x4,
+                                        getWReg(wt),
+                                        getWReg(wt)),
+                                  mkU8(32)));
+                     assign(t4, // odd
+                            binop(Iop_SarN64x2,
+                                  getWReg(wt), mkU8(32)));
+                     assign(t5,
+                         binop(Iop_Sub64x2,
+                               binop(Iop_ShlN64x2,
+                                  binop(Iop_SarN64x2,
+                                     binop(Iop_InterleaveEvenLanes32x4,
+                                           getWReg(wd),
+                                           getWReg(wd)),
+                                     mkU8(32)),
+                                  mkU8(31)),
+                               binop(Iop_64HLtoV128,
+                                     binop(Iop_Mul64,
+                                           unop(Iop_V128HIto64,
+                                                mkexpr(t1)),
+                                           unop(Iop_V128HIto64,
+                                                mkexpr(t3))),
+                                     binop(Iop_Mul64,
+                                           unop(Iop_V128to64,
+                                                mkexpr(t1)),
+                                           unop(Iop_V128to64,
+                                                mkexpr(t3))))));
+                     assign(t6,
+                            binop(Iop_Sub64x2,
+                                  binop(Iop_ShlN64x2,
+                                        binop(Iop_SarN64x2,
+                                              getWReg(wd),
+                                              mkU8(32)),
+                                        mkU8(31)),
+                                  binop(Iop_64HLtoV128,
+                                        binop(Iop_Mul64,
+                                              unop(Iop_V128HIto64,
+                                                   mkexpr(t2)),
+                                              unop(Iop_V128HIto64,
+                                                   mkexpr(t4))),
+                                        binop(Iop_Mul64,
+                                              unop(Iop_V128to64,
+                                                   mkexpr(t2)),
+                                              unop(Iop_V128to64,
+                                                   mkexpr(t4))))));
+                     putWReg(wd,
+                             binop(Iop_InterleaveEvenLanes32x4,
+                                   binop(Iop_QandQSarNnarrow64Sto32Sx2,
+                                         mkexpr(t6), mkU8(31)),
+                                   binop(Iop_QandQSarNnarrow64Sto32Sx2,
+                                         mkexpr(t5), mkU8(31))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x09: { /* FSOR.df */
+            switch (df) {
+               case 0x00: { /* FSOR.W */
+                     DIP("FSOR.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSORW, 2);
+                     putWReg(wd,
+                             unop(Iop_NotV128,
+                                  binop(Iop_CmpUN32Fx4,
+                                        getWReg(ws),
+                                        getWReg(wt))));
+                     break;
+                  }
+
+               case 0x01: { /* FSOR.D */
+                     DIP("FSOR.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSORD, 2);
+                     putWReg(wd,
+                             unop(Iop_NotV128,
+                                  binop(Iop_CmpUN64Fx2,
+                                        getWReg(ws),
+                                        getWReg(wt))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x0A: { /* FSUNE.df */
+            switch (df) {
+               case 0x00: { /* FSUNE.W */
+                     DIP("FSUNE.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSUNEW, 2);
+                     putWReg(wd,
+                             unop(Iop_NotV128,
+                                  binop(Iop_CmpEQ32Fx4,
+                                        getWReg(ws),
+                                        getWReg(wt))));
+                     break;
+                  }
+
+               case 0x01: { /* FSUNE.D */
+                     DIP("FSUNE.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSUNED, 2);
+                     putWReg(wd,
+                             unop(Iop_NotV128,
+                                  binop(Iop_CmpEQ64Fx2,
+                                        getWReg(ws),
+                                        getWReg(wt))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x0B: { /* FSNE.df */
+            switch (df) {
+               case 0x00: { /* FSNE.W */
+                     DIP("FSNE.W w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSNEW, 2);
+                     putWReg(wd,
+                             binop(Iop_XorV128,
+                                   unop(Iop_NotV128,
+                                        binop(Iop_CmpEQ32Fx4,
+                                              getWReg(ws),
+                                              getWReg(wt))),
+                                   binop(Iop_CmpUN32Fx4,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               case 0x01: { /* FSNE.D */
+                     DIP("FSNE.D w%d, w%d, w%d", wd, ws, wt);
+                     calculateMSACSR(ws, wt, FSNED, 2);
+                     putWReg(wd,
+                             binop(Iop_XorV128,
+                                   unop(Iop_NotV128,
+                                        binop(Iop_CmpEQ64Fx2,
+                                              getWReg(ws),
+                                              getWReg(wt))),
+                                   binop(Iop_CmpUN64Fx2,
+                                         getWReg(ws),
+                                         getWReg(wt))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x0C: { /* MULR_Q.df */
+            switch (df) {
+               case 0x00: { /* MULR_Q.H */
+                     DIP("MULR_Q.H w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_QRDMulHi16Sx8,
+                                      mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               case 0x01: { /* MULR_Q.W */
+                     DIP("MULR_Q.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+                     assign(t2, getWReg(wt));
+                     assign(t3, binop(Iop_QRDMulHi32Sx4,
+                                      mkexpr(t1), mkexpr(t2)));
+                     putWReg(wd, mkexpr(t3));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x0D: { /* MADDR_Q.df */
+            switch (df) {
+               case 0x00: { /* MADDR_Q.W */
+                     DIP("MADDR_Q.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     t5 = newTemp(Ity_V128);
+                     t6 = newTemp(Ity_V128);
+                     assign(t1, // even
+                            binop(Iop_SarN32x4,
+                                  binop(Iop_InterleaveEvenLanes16x8,
+                                        getWReg(ws),
+                                        getWReg(ws)),
+                                  mkU8(16)));
+                     assign(t2, // odd
+                            binop(Iop_SarN32x4,
+                                  getWReg(ws), mkU8(16)));
+                     assign(t3, // even
+                            binop(Iop_SarN32x4,
+                                  binop(Iop_InterleaveEvenLanes16x8,
+                                        getWReg(wt),
+                                        getWReg(wt)),
+                                  mkU8(16)));
+                     assign(t4, // odd
+                            binop(Iop_SarN32x4,
+                                  getWReg(wt), mkU8(16)));
+                     assign(t5,
+                         binop(Iop_Add32x4,
+                            binop(Iop_ShlN32x4,
+                               binop(Iop_SarN32x4,
+                                     binop(Iop_InterleaveEvenLanes16x8,
+                                           getWReg(wd),
+                                           getWReg(wd)),
+                                     mkU8(16)),
+                               mkU8(15)),
+                            binop(Iop_Mul32x4,
+                                  mkexpr(t1), mkexpr(t3))));
+                     assign(t6,
+                            binop(Iop_Add32x4,
+                                  binop(Iop_ShlN32x4,
+                                        binop(Iop_SarN32x4,
+                                              getWReg(wd),
+                                              mkU8(16)),
+                                        mkU8(15)),
+                                  binop(Iop_Mul32x4,
+                                        mkexpr(t2), mkexpr(t4))));
+                     putWReg(wd,
+                          binop(Iop_InterleaveEvenLanes16x8,
+                                binop(Iop_QandQRSarNnarrow32Sto16Sx4,
+                                      mkexpr(t6), mkU8(15)),
+                                binop(Iop_QandQRSarNnarrow32Sto16Sx4,
+                                      mkexpr(t5), mkU8(15))));
+                     break;
+                  }
+
+               case 0x01: { /* MADDR_Q.D */
+                     DIP("MADDR_Q.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     t5 = newTemp(Ity_V128);
+                     t6 = newTemp(Ity_V128);
+                     assign(t1, // even
+                            binop(Iop_SarN64x2,
+                                  binop(Iop_InterleaveEvenLanes32x4,
+                                        getWReg(ws),
+                                        getWReg(ws)),
+                                  mkU8(32)));
+                     assign(t2, // odd
+                            binop(Iop_SarN64x2,
+                                  getWReg(ws), mkU8(32)));
+                     assign(t3, // even
+                            binop(Iop_SarN64x2,
+                                  binop(Iop_InterleaveEvenLanes32x4,
+                                        getWReg(wt),
+                                        getWReg(wt)),
+                                  mkU8(32)));
+                     assign(t4, // odd
+                            binop(Iop_SarN64x2,
+                                  getWReg(wt), mkU8(32)));
+                     assign(t5,
+                         binop(Iop_Add64x2,
+                            binop(Iop_ShlN64x2,
+                               binop(Iop_SarN64x2,
+                                  binop(Iop_InterleaveEvenLanes32x4,
+                                        getWReg(wd),
+                                        getWReg(wd)),
+                                  mkU8(32)),
+                               mkU8(31)),
+                            binop(Iop_64HLtoV128,
+                                  binop(Iop_Mul64,
+                                        unop(Iop_V128HIto64,
+                                             mkexpr(t1)),
+                                        unop(Iop_V128HIto64,
+                                             mkexpr(t3))),
+                                  binop(Iop_Mul64,
+                                        unop(Iop_V128to64,
+                                             mkexpr(t1)),
+                                        unop(Iop_V128to64,
+                                             mkexpr(t3))))));
+                     assign(t6,
+                            binop(Iop_Add64x2,
+                                  binop(Iop_ShlN64x2,
+                                        binop(Iop_SarN64x2,
+                                              getWReg(wd),
+                                              mkU8(32)),
+                                        mkU8(31)),
+                                  binop(Iop_64HLtoV128,
+                                        binop(Iop_Mul64,
+                                              unop(Iop_V128HIto64,
+                                                   mkexpr(t2)),
+                                              unop(Iop_V128HIto64,
+                                                   mkexpr(t4))),
+                                        binop(Iop_Mul64,
+                                              unop(Iop_V128to64,
+                                                   mkexpr(t2)),
+                                              unop(Iop_V128to64,
+                                                   mkexpr(t4))))));
+                     putWReg(wd,
+                          binop(Iop_InterleaveEvenLanes32x4,
+                                binop(Iop_QandQRSarNnarrow64Sto32Sx2,
+                                      mkexpr(t6), mkU8(31)),
+                                binop(Iop_QandQRSarNnarrow64Sto32Sx2,
+                                      mkexpr(t5), mkU8(31))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x0E: { /* MSUBR_Q.df */
+            switch (df) {
+               case 0x00: { /* MSUBR_Q.W */
+                     DIP("MSUBR_Q.W w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     t5 = newTemp(Ity_V128);
+                     t6 = newTemp(Ity_V128);
+                     assign(t1, // even
+                            binop(Iop_SarN32x4,
+                                  binop(Iop_InterleaveEvenLanes16x8,
+                                        getWReg(ws),
+                                        getWReg(ws)),
+                                  mkU8(16)));
+                     assign(t2, // odd
+                            binop(Iop_SarN32x4,
+                                  getWReg(ws), mkU8(16)));
+                     assign(t3, // even
+                            binop(Iop_SarN32x4,
+                                  binop(Iop_InterleaveEvenLanes16x8,
+                                        getWReg(wt),
+                                        getWReg(wt)),
+                                  mkU8(16)));
+                     assign(t4, // odd
+                            binop(Iop_SarN32x4,
+                                  getWReg(wt), mkU8(16)));
+                     assign(t5,
+                         binop(Iop_Sub32x4,
+                            binop(Iop_ShlN32x4,
+                                  binop(Iop_SarN32x4,
+                                     binop(Iop_InterleaveEvenLanes16x8,
+                                           getWReg(wd),
+                                           getWReg(wd)),
+                                     mkU8(16)),
+                                  mkU8(15)),
+                            binop(Iop_Mul32x4,
+                                  mkexpr(t1), mkexpr(t3))));
+                     assign(t6,
+                            binop(Iop_Sub32x4,
+                                  binop(Iop_ShlN32x4,
+                                        binop(Iop_SarN32x4,
+                                              getWReg(wd),
+                                              mkU8(16)),
+                                        mkU8(15)),
+                                  binop(Iop_Mul32x4,
+                                        mkexpr(t2), mkexpr(t4))));
+                     putWReg(wd,
+                          binop(Iop_InterleaveEvenLanes16x8,
+                                binop(Iop_QandQRSarNnarrow32Sto16Sx4,
+                                      mkexpr(t6), mkU8(15)),
+                                binop(Iop_QandQRSarNnarrow32Sto16Sx4,
+                                      mkexpr(t5), mkU8(15))));
+                     break;
+                  }
+
+               case 0x01: { /* MSUBR_Q.D */
+                     DIP("MSUBR_Q.D w%d, w%d, w%d", wd, ws, wt);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     t5 = newTemp(Ity_V128);
+                     t6 = newTemp(Ity_V128);
+                     assign(t1, // even
+                            binop(Iop_SarN64x2,
+                                  binop(Iop_InterleaveEvenLanes32x4,
+                                        getWReg(ws),
+                                        getWReg(ws)),
+                                  mkU8(32)));
+                     assign(t2, // odd
+                            binop(Iop_SarN64x2,
+                                  getWReg(ws), mkU8(32)));
+                     assign(t3, // even
+                            binop(Iop_SarN64x2,
+                                  binop(Iop_InterleaveEvenLanes32x4,
+                                        getWReg(wt),
+                                        getWReg(wt)),
+                                  mkU8(32)));
+                     assign(t4, // odd
+                            binop(Iop_SarN64x2,
+                                  getWReg(wt), mkU8(32)));
+                     assign(t5,
+                         binop(Iop_Sub64x2,
+                            binop(Iop_ShlN64x2,
+                               binop(Iop_SarN64x2,
+                                  binop(Iop_InterleaveEvenLanes32x4,
+                                        getWReg(wd),
+                                        getWReg(wd)),
+                                  mkU8(32)),
+                               mkU8(31)),
+                            binop(Iop_64HLtoV128,
+                                  binop(Iop_Mul64,
+                                        unop(Iop_V128HIto64,
+                                             mkexpr(t1)),
+                                        unop(Iop_V128HIto64,
+                                             mkexpr(t3))),
+                                  binop(Iop_Mul64,
+                                        unop(Iop_V128to64,
+                                             mkexpr(t1)),
+                                        unop(Iop_V128to64,
+                                             mkexpr(t3))))));
+                  assign(t6,
+                            binop(Iop_Sub64x2,
+                                  binop(Iop_ShlN64x2,
+                                        binop(Iop_SarN64x2,
+                                              getWReg(wd),
+                                              mkU8(32)),
+                                        mkU8(31)),
+                                  binop(Iop_64HLtoV128,
+                                        binop(Iop_Mul64,
+                                              unop(Iop_V128HIto64,
+                                                   mkexpr(t2)),
+                                              unop(Iop_V128HIto64,
+                                                   mkexpr(t4))),
+                                        binop(Iop_Mul64,
+                                              unop(Iop_V128to64,
+                                                   mkexpr(t2)),
+                                              unop(Iop_V128to64,
+                                                   mkexpr(t4))))));
+                     putWReg(wd,
+                          binop(Iop_InterleaveEvenLanes32x4,
+                                binop(Iop_QandQRSarNnarrow64Sto32Sx2,
+                                      mkexpr(t6), mkU8(31)),
+                                binop(Iop_QandQRSarNnarrow64Sto32Sx2,
+                                      mkexpr(t5), mkU8(31))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_ELM(UInt cins, UChar wd, UChar ws) { /* ELM (0x19) */
+   IRTemp t1, t2, t3, t4, t5;
+   IRType ty;
+   UShort operation;
+   UChar df, n;
+
+   operation = (cins & 0x03C00000) >> 22;
+   ty = mode64 ? Ity_I64 : Ity_I32;
+
+   switch ((cins & 0x03FF0000) >> 16) {
+      case 0x07E: /* CFCMSA */
+         DIP("CFCMSA r%d, c%d", wd, ws);
+
+         switch (ws) {
+            case 0: { /* MSAIR */
+                  IRDirty *d;
+                  t1 = newTemp(Ity_I32);
+                  /* IRExpr_BBPTR() =>
+                                       Need to pass pointer to
+                                             guest state to helper. */
+                  d = unsafeIRDirty_1_N(t1, 0,
+                                        "mips_dirtyhelper_get_MSAIR",
+                                        &mips_dirtyhelper_get_MSAIR,
+                                        mkIRExprVec_0());
+                  /* d->nFxState = 0; */
+                  stmt(IRStmt_Dirty(d));
+                  putIReg(wd,
+                          mkWidenFrom32(ty, mkexpr(t1), True));
+                  break;
+               }
+
+            case 1: /* MSACSR */
+               putIReg(wd,
+                       mkWidenFrom32(ty, getMSACSR(), True));
+               break;
+
+            default:
+               putIReg(wd,
+                       mkWidenFrom32(ty, mkU32(0), False));
+               break;
+         }
+
+         break;
+
+      case 0x03E: /* CTCMSA */
+         DIP("CTCMSA r%d, c%d", ws, wd);
+
+         if (wd == 1) { /* MSACSR */
+            putMSACSR(
+               binop(Iop_And32, mkNarrowTo32(ty, getIReg(ws)),
+                     mkU32(0x1FFFFFF)));
+         }
+
+         break;
+
+      case 0x0BE: /* MOVE.V */
+         DIP("MOVE.V w%d, w%d", ws, wd);
+         putWReg(wd, getWReg(ws));
+         break;
+
+      default:
+         df = (cins & 0x003F0000) >> 16;
+         if ((df & 0x38) == 0x38) {        // 11100n; dw
+            n = df & 0x01;
+            df = 0x38;
+         } else if ((df & 0x30) == 0x30) { // 1100nn; w
+            n = df & 0x03;
+            df = 0x30;
+         } else if ((df & 0x20) == 0x20) { // 100nnn; hw
+            n = df & 0x07;
+            df = 0x20;
+         } else if ((df & 0x00) == 0x00) { // 00nnnn; b
+            n = df & 0x0F;
+            df = 0x00;
+         }
+
+         switch (operation) {
+            case 0x00: /* SLDI.df */
+               switch (df) {
+                  case 0x00: /* SLDI.B */
+                     DIP("SLDI.B w%d, w%d[%d]", wd, ws, n);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_V128);
+                     assign(t1,
+                            binop(Iop_ShrV128,
+                                  getWReg(ws),
+                                  mkU8(n << 3)));
+                     assign(t2,
+                            binop(Iop_ShlV128,
+                                  getWReg(wd),
+                                  mkU8(n ?
+                                       (16 - n) << 3 : 0)));
+                     putWReg(wd,
+                             binop(Iop_OrV128, mkexpr(t1), mkexpr(t2)));
+                     break;
+
+                  case 0x20: /* SLDI.H */
+                     DIP("SLDI.H w%d, w%d[%d]", wd, ws, n);
+
+                     if (n == 0) {
+                        putWReg(wd, getWReg(ws));
+                     } else {
+                        t1 = newTemp(Ity_V128);
+                        t2 = newTemp(Ity_V128);
+                        assign(t1,
+                               binop(Iop_ShrN64x2,
+                                     getWReg(ws),
+                                     mkU8(n << 3)));
+                        assign(t2,
+                               binop(Iop_ShlN64x2,
+                                     getWReg(wd),
+                                     mkU8((8 - n) << 3)));
+                        putWReg(wd,
+                                binop(Iop_OrV128,
+                                      mkexpr(t1),
+                                      mkexpr(t2)));
+                     }
+
+                     break;
+
+                  case 0x30: /* SLDI.W */
+                     DIP("SLDI.W w%d, w%d[%d]", wd, ws, n);
+
+                     if (n == 0) {
+                        putWReg(wd, getWReg(ws));
+                     } else {
+                        t1 = newTemp(Ity_V128);
+                        t2 = newTemp(Ity_V128);
+                        assign(t1,
+                               binop(Iop_ShrN32x4,
+                                     getWReg(ws),
+                                     mkU8(n << 3)));
+                        assign(t2,
+                               binop(Iop_ShlN32x4,
+                                     getWReg(wd),
+                                     mkU8((4 - n) << 3)));
+                        putWReg(wd,
+                                binop(Iop_OrV128,
+                                      mkexpr(t1),
+                                      mkexpr(t2)));
+                     }
+
+                     break;
+
+                  case 0x38:  /* SLDI.D */
+                     DIP("SLDI.D w%d, w%d[%d]", wd, ws, n);
+
+                     if (n == 0) {
+                        putWReg(wd, getWReg(ws));
+                     } else {
+                        t1 = newTemp(Ity_V128);
+                        t2 = newTemp(Ity_V128);
+                        assign(t1,
+                               binop(Iop_ShrN16x8,
+                                     getWReg(ws),
+                                     mkU8(n << 3)));
+                        assign(t2,
+                               binop(Iop_ShlN16x8,
+                                     getWReg(wd),
+                                     mkU8((2 - n) << 3)));
+                        putWReg(wd,
+                                binop(Iop_OrV128,
+                                      mkexpr(t1),
+                                      mkexpr(t2)));
+                     }
+
+                     break;
+
+                  default:
+                     return -1;
+               }
+
+               break;
+
+            case 0x01: /* SPLATI.df */
+               switch (df) {
+                  case 0x00: { /* SPLATI.B */
+                        DIP("SPLATI.B w%d, w%d[%d]", wd, ws, n);
+                        t1 = newTemp(Ity_V128);
+                        t2 = newTemp(Ity_V128);
+                        t3 = newTemp(Ity_V128);
+                        t4 = newTemp(Ity_V128);
+
+                        if (n & 1)
+                           assign(t1,
+                                  binop(Iop_InterleaveOddLanes8x16,
+                                        getWReg(ws),
+                                        getWReg(ws)));
+                        else
+                           assign(t1,
+                                  binop(Iop_InterleaveEvenLanes8x16,
+                                        getWReg(ws),
+                                        getWReg(ws)));
+
+                        n /= 2;
+
+                        if (n & 1)
+                           assign(t2,
+                                  binop(Iop_InterleaveOddLanes16x8,
+                                        mkexpr(t1), mkexpr(t1)));
+                        else
+                           assign(t2,
+                                  binop(Iop_InterleaveEvenLanes16x8,
+                                        mkexpr(t1), mkexpr(t1)));
+
+                        n /= 2;
+
+                        if (n & 1)
+                           assign(t3,
+                                  binop(Iop_InterleaveOddLanes32x4,
+                                        mkexpr(t2), mkexpr(t2)));
+                        else
+                           assign(t3,
+                                  binop(Iop_InterleaveEvenLanes32x4,
+                                        mkexpr(t2), mkexpr(t2)));
+
+                        n /= 2;
+
+                        if (n & 1)
+                           assign(t4,
+                                  binop(Iop_InterleaveHI64x2,
+                                        mkexpr(t3), mkexpr(t3)));
+                        else
+                           assign(t4,
+                                  binop(Iop_InterleaveLO64x2,
+                                        mkexpr(t3), mkexpr(t3)));
+
+                        putWReg(wd, mkexpr(t4));
+                        break;
+                     }
+
+                  case 0x20: { /* SPLATI.H */
+                        DIP("SPLATI.H w%d, w%d[%d]", wd, ws, n);
+                        t1 = newTemp(Ity_V128);
+                        t2 = newTemp(Ity_V128);
+                        t3 = newTemp(Ity_V128);
+
+                        if (n & 1)
+                           assign(t1,
+                                  binop(Iop_InterleaveOddLanes16x8,
+                                        getWReg(ws),
+                                        getWReg(ws)));
+                        else
+                           assign(t1,
+                                  binop(Iop_InterleaveEvenLanes16x8,
+                                        getWReg(ws),
+                                        getWReg(ws)));
+
+                        n /= 2;
+
+                        if (n & 1)
+                           assign(t2,
+                                  binop(Iop_InterleaveOddLanes32x4,
+                                        mkexpr(t1), mkexpr(t1)));
+                        else
+                           assign(t2,
+                                  binop(Iop_InterleaveEvenLanes32x4,
+                                        mkexpr(t1), mkexpr(t1)));
+
+                        n /= 2;
+
+                        if (n & 1)
+                           assign(t3,
+                                  binop(Iop_InterleaveHI64x2,
+                                        mkexpr(t2), mkexpr(t2)));
+                        else
+                           assign(t3,
+                                  binop(Iop_InterleaveLO64x2,
+                                        mkexpr(t2), mkexpr(t2)));
+
+                        putWReg(wd, mkexpr(t3));
+                        break;
+                     }
+
+                  case 0x30: { /* SPLATI.W */
+                        DIP("SPLATI.W w%d, w%d[%d]", wd, ws, n);
+                        t1 = newTemp(Ity_V128);
+                        t2 = newTemp(Ity_V128);
+                        t3 = newTemp(Ity_V128);
+                        assign(t1, getWReg(ws));
+
+                        if (n & 1)
+                           assign(t2,
+                                  binop(Iop_InterleaveOddLanes32x4,
+                                        mkexpr(t1), mkexpr(t1)));
+                        else
+                           assign(t2,
+                                  binop(Iop_InterleaveEvenLanes32x4,
+                                        mkexpr(t1), mkexpr(t1)));
+
+                        n /= 2;
+
+                        if (n & 1)
+                           assign(t3,
+                                  binop(Iop_InterleaveHI64x2,
+                                        mkexpr(t2), mkexpr(t2)));
+                        else
+                           assign(t3,
+                                  binop(Iop_InterleaveLO64x2,
+                                        mkexpr(t2), mkexpr(t2)));
+
+                        putWReg(wd, mkexpr(t3));
+                        break;
+                     }
+
+                  case 0x38: /* SPLATI.D */
+                     DIP("SPLATI.D w%d, w%d[%d]", wd, ws, n);
+                     t1 = newTemp(Ity_V128);
+                     t3 = newTemp(Ity_V128);
+                     assign(t1, getWReg(ws));
+
+                     if (n)
+                        assign(t3,
+                               binop(Iop_InterleaveHI64x2,
+                                     mkexpr(t1), mkexpr(t1)));
+                     else
+                        assign(t3,
+                               binop(Iop_InterleaveLO64x2,
+                                     mkexpr(t1), mkexpr(t1)));
+
+                     putWReg(wd, mkexpr(t3));
+                     break;
+
+                  default:
+                     return -1;
+               }
+
+               break;
+
+            case 0x02: /* COPY_S.df */
+               switch (df) {
+                  case 0x00: /* COPY_S.B */
+                     DIP("COPY_S.B r%d, w%d[%d]", wd, ws, n);
+                     t1 = newTemp(Ity_I8);
+
+                     switch (n) {
+                        case 0:
+                           assign(t1,
+                                  unop(Iop_32to8,
+                                       unop(Iop_V128to32,
+                                            getWReg(ws))));
+                           break;
+
+                        case 1:
+                           assign(t1,
+                                  unop(Iop_16HIto8,
+                                       unop(Iop_32to16,
+                                            unop(Iop_V128to32,
+                                                 getWReg(ws)))));
+                           break;
+
+                        case 2:
+                           assign(t1,
+                                  unop(Iop_16to8,
+                                       unop(Iop_32HIto16,
+                                            unop(Iop_64to32,
+                                                 unop(Iop_V128to64,
+                                                      getWReg(ws))))));
+                           break;
+
+                        case 3:
+                           assign(t1,
+                                  unop(Iop_16HIto8,
+                                       unop(Iop_32HIto16,
+                                            unop(Iop_64to32,
+                                                 unop(Iop_V128to64,
+                                                      getWReg(ws))))));
+                           break;
+
+                        case 4:
+                           assign(t1,
+                                  unop(Iop_16to8,
+                                       unop(Iop_32to16,
+                                            unop(Iop_64HIto32,
+                                                 unop(Iop_V128to64,
+                                                      getWReg(ws))))));
+                           break;
+
+                        case 5:
+                           assign(t1,
+                                  unop(Iop_16HIto8,
+                                       unop(Iop_32to16,
+                                            unop(Iop_64HIto32,
+                                                 unop(Iop_V128to64,
+                                                      getWReg(ws))))));
+                           break;
+
+                        case 6:
+                           assign(t1,
+                                  unop(Iop_16to8,
+                                       unop(Iop_32HIto16,
+                                            unop(Iop_64HIto32,
+                                                 unop(Iop_V128to64,
+                                                      getWReg(ws))))));
+                           break;
+
+                        case 7:
+                           assign(t1,
+                                  unop(Iop_16HIto8,
+                                       unop(Iop_32HIto16,
+                                            unop(Iop_64HIto32,
+                                                 unop(Iop_V128to64,
+                                                      getWReg(ws))))));
+                           break;
+
+                        case 8:
+                           assign(t1,
+                                  unop(Iop_16to8,
+                                       unop(Iop_32to16,
+                                            unop(Iop_64to32,
+                                                 unop(Iop_V128HIto64,
+                                                      getWReg(ws))))));
+                           break;
+
+                        case 9:
+                           assign(t1,
+                                  unop(Iop_16HIto8,
+                                       unop(Iop_32to16,
+                                            unop(Iop_64to32,
+                                                 unop(Iop_V128HIto64,
+                                                      getWReg(ws))))));
+                           break;
+
+                        case 10:
+                           assign(t1,
+                                  unop(Iop_16to8,
+                                       unop(Iop_32HIto16,
+                                            unop(Iop_64to32,
+                                                 unop(Iop_V128HIto64,
+                                                      getWReg(ws))))));
+                           break;
+
+                        case 11:
+                           assign(t1,
+                                  unop(Iop_16HIto8,
+                                       unop(Iop_32HIto16,
+                                            unop(Iop_64to32,
+                                                 unop(Iop_V128HIto64,
+                                                      getWReg(ws))))));
+                           break;
+
+                        case 12:
+                           assign(t1,
+                                  unop(Iop_16to8,
+                                       unop(Iop_32to16,
+                                            unop(Iop_64HIto32,
+                                                 unop(Iop_V128HIto64,
+                                                      getWReg(ws))))));
+                           break;
+
+                        case 13:
+                           assign(t1,
+                                  unop(Iop_16HIto8,
+                                       unop(Iop_32to16,
+                                            unop(Iop_64HIto32,
+                                                 unop(Iop_V128HIto64,
+                                                      getWReg(ws))))));
+                           break;
+
+                        case 14:
+                           assign(t1,
+                                  unop(Iop_16to8,
+                                       unop(Iop_32HIto16,
+                                            unop(Iop_64HIto32,
+                                                 unop(Iop_V128HIto64,
+                                                      getWReg(ws))))));
+                           break;
+
+                        case 15:
+                           assign(t1,
+                                  unop(Iop_16HIto8,
+                                       unop(Iop_32HIto16,
+                                            unop(Iop_64HIto32,
+                                                 unop(Iop_V128HIto64,
+                                                      getWReg(ws))))));
+                           break;
+                     }
+
+                     putIReg(wd,
+                             unop(mode64 ? Iop_8Sto64 : Iop_8Sto32,
+                                  mkexpr(t1)));
+                     break;
+
+                  case 0x20: /* COPY_S.H */
+                     DIP("COPY_S.H r%d, w%d[%d]", wd, ws, n);
+                     t1 = newTemp(Ity_I16);
+
+                     switch (n) {
+                        case 0:
+                           assign(t1,
+                                  unop(Iop_32to16,
+                                       unop(Iop_64to32,
+                                            unop(Iop_V128to64,
+                                                 getWReg(ws)))));
+                           break;
+
+                        case 1:
+                           assign(t1,
+                                  unop(Iop_32HIto16,
+                                       unop(Iop_64to32,
+                                            unop(Iop_V128to64,
+                                                 getWReg(ws)))));
+                           break;
+
+                        case 2:
+                           assign(t1,
+                                  unop(Iop_32to16,
+                                       unop(Iop_64HIto32,
+                                            unop(Iop_V128to64,
+                                                 getWReg(ws)))));
+                           break;
+
+                        case 3:
+                           assign(t1,
+                                  unop(Iop_32HIto16,
+                                       unop(Iop_64HIto32,
+                                            unop(Iop_V128to64,
+                                                 getWReg(ws)))));
+                           break;
+
+                        case 4:
+                           assign(t1,
+                                  unop(Iop_32to16,
+                                       unop(Iop_64to32,
+                                            unop(Iop_V128HIto64,
+                                                 getWReg(ws)))));
+                           break;
+
+                        case 5:
+                           assign(t1,
+                                  unop(Iop_32HIto16,
+                                       unop(Iop_64to32,
+                                            unop(Iop_V128HIto64,
+                                                 getWReg(ws)))));
+                           break;
+
+                        case 6:
+                           assign(t1,
+                                  unop(Iop_32to16,
+                                       unop(Iop_64HIto32,
+                                            unop(Iop_V128HIto64,
+                                                 getWReg(ws)))));
+                           break;
+
+                        case 7:
+                           assign(t1,
+                                  unop(Iop_32HIto16,
+                                       unop(Iop_64HIto32,
+                                            unop(Iop_V128HIto64,
+                                                 getWReg(ws)))));
+                           break;
+                     }
+
+                     putIReg(wd,
+                             unop(mode64 ? Iop_16Sto64 : Iop_16Sto32,
+                                  mkexpr(t1)));
+                     break;
+
+                  case 0x30: /* COPY_S.W */
+                     DIP("COPY_S.W r%d, w%d[%d]", wd, ws, n);
+
+                     switch (n) {
+                        case 0:
+                           putIReg(wd,
+                                   mkWidenFrom32(ty,
+                                                 unop(Iop_V128to32,
+                                                      getWReg(ws)),
+                                                 True));
+                           break;
+
+                        case 1:
+                           t2 = newTemp(Ity_I64);
+                           assign(t2,
+                                  unop(Iop_V128to64, getWReg(ws)));
+                           putIReg(wd,
+                                   mkWidenFrom32(ty,
+                                                 unop(Iop_64HIto32,
+                                                      mkexpr(t2)),
+                                                 True));
+                           break;
+
+                        case 2:
+                           t2 = newTemp(Ity_I64);
+                           assign(t2,
+                                  unop(Iop_V128HIto64,
+                                       getWReg(ws)));
+                           putIReg(wd,
+                                   mkWidenFrom32(ty,
+                                                 unop(Iop_64to32,
+                                                      mkexpr(t2)),
+                                                 True));
+                           break;
+
+                        case 3:
+                           t2 = newTemp(Ity_I64);
+                           assign(t2,
+                                  unop(Iop_V128HIto64,
+                                       getWReg(ws)));
+                           putIReg(wd,
+                                   mkWidenFrom32(ty,
+                                                 unop(Iop_64HIto32,
+                                                      mkexpr(t2)),
+                                                 True));
+                           break;
+
+                        default:
+                           break;
+                     }
+
+                     break;
+
+                  case 0x38: /* COPY_S.D */
+                     if (mode64) {
+                        DIP("COPY_S.D r%d, w%d[%d]", wd, ws, n);
+
+                        switch (n) {
+                           case 0:
+                              putIReg(wd,
+                                      unop(Iop_V128to64,
+                                           getWReg(ws)));
+                              break;
+
+                           case 1:
+                              putIReg(wd,
+                                      unop(Iop_V128HIto64,
+                                           getWReg(ws)));
+                              break;
+                        }
+                     } else {
+                        return -2;
+                     }
+
+                     break;
+
+                  default:
+                     return -1;
+               }
+
+               break;
+
+            case 0x03: { /* COPY_U.df */
+                  switch (df) {
+                     case 0x00: /* COPY_U.B */
+                        DIP("COPY_U.B r%d, w%d[%d]", wd, ws, n);
+                        t1 = newTemp(Ity_I8);
+
+                        switch (n) {
+                           case 0:
+                              assign(t1,
+                                     unop(Iop_16to8,
+                                          unop(Iop_32to16,
+                                               unop(Iop_64to32,
+                                                    unop(Iop_V128to64,
+                                                         getWReg(ws))))));
+                              break;
+
+                           case 1:
+                              assign(t1,
+                                     unop(Iop_16HIto8,
+                                          unop(Iop_32to16,
+                                               unop(Iop_64to32,
+                                                    unop(Iop_V128to64,
+                                                         getWReg(ws))))));
+                              break;
+
+                           case 2:
+                              assign(t1,
+                                     unop(Iop_16to8,
+                                          unop(Iop_32HIto16,
+                                               unop(Iop_64to32,
+                                                    unop(Iop_V128to64,
+                                                         getWReg(ws))))));
+                              break;
+
+                           case 3:
+                              assign(t1,
+                                     unop(Iop_16HIto8,
+                                          unop(Iop_32HIto16,
+                                               unop(Iop_64to32,
+                                                    unop(Iop_V128to64,
+                                                         getWReg(ws))))));
+                              break;
+
+                           case 4:
+                              assign(t1,
+                                     unop(Iop_16to8,
+                                          unop(Iop_32to16,
+                                               unop(Iop_64HIto32,
+                                                    unop(Iop_V128to64,
+                                                         getWReg(ws))))));
+                              break;
+
+                           case 5:
+                              assign(t1,
+                                     unop(Iop_16HIto8,
+                                          unop(Iop_32to16,
+                                               unop(Iop_64HIto32,
+                                                    unop(Iop_V128to64,
+                                                         getWReg(ws))))));
+                              break;
+
+                           case 6:
+                              assign(t1,
+                                     unop(Iop_16to8,
+                                          unop(Iop_32HIto16,
+                                               unop(Iop_64HIto32,
+                                                    unop(Iop_V128to64,
+                                                         getWReg(ws))))));
+                              break;
+
+                           case 7:
+                              assign(t1,
+                                     unop(Iop_16HIto8,
+                                          unop(Iop_32HIto16,
+                                               unop(Iop_64HIto32,
+                                                    unop(Iop_V128to64,
+                                                         getWReg(ws))))));
+                              break;
+
+                           case 8:
+                              assign(t1,
+                                     unop(Iop_16to8,
+                                          unop(Iop_32to16,
+                                               unop(Iop_64to32,
+                                                    unop(Iop_V128HIto64,
+                                                         getWReg(ws))))));
+                              break;
+
+                           case 9:
+                              assign(t1,
+                                     unop(Iop_16HIto8,
+                                          unop(Iop_32to16,
+                                               unop(Iop_64to32,
+                                                    unop(Iop_V128HIto64,
+                                                         getWReg(ws))))));
+                              break;
+
+                           case 10:
+                              assign(t1,
+                                     unop(Iop_16to8,
+                                          unop(Iop_32HIto16,
+                                               unop(Iop_64to32,
+                                                    unop(Iop_V128HIto64,
+                                                         getWReg(ws))))));
+                              break;
+
+                           case 11:
+                              assign(t1,
+                                     unop(Iop_16HIto8,
+                                          unop(Iop_32HIto16,
+                                               unop(Iop_64to32,
+                                                    unop(Iop_V128HIto64,
+                                                         getWReg(ws))))));
+                              break;
+
+                           case 12:
+                              assign(t1,
+                                     unop(Iop_16to8,
+                                          unop(Iop_32to16,
+                                               unop(Iop_64HIto32,
+                                                    unop(Iop_V128HIto64,
+                                                         getWReg(ws))))));
+                              break;
+
+                           case 13:
+                              assign(t1,
+                                     unop(Iop_16HIto8,
+                                          unop(Iop_32to16,
+                                               unop(Iop_64HIto32,
+                                                    unop(Iop_V128HIto64,
+                                                         getWReg(ws))))));
+                              break;
+
+                           case 14:
+                              assign(t1,
+                                     unop(Iop_16to8,
+                                          unop(Iop_32HIto16,
+                                               unop(Iop_64HIto32,
+                                                    unop(Iop_V128HIto64,
+                                                         getWReg(ws))))));
+                              break;
+
+                           case 15:
+                              assign(t1,
+                                     unop(Iop_16HIto8,
+                                          unop(Iop_32HIto16,
+                                               unop(Iop_64HIto32,
+                                                    unop(Iop_V128HIto64,
+                                                         getWReg(ws))))));
+                              break;
+                        }
+
+                        putIReg(wd,
+                                unop(mode64 ? Iop_8Uto64 : Iop_8Uto32,
+                                     mkexpr(t1)));
+                        break;
+
+                     case 0x20: /* COPY_U.H */
+                        DIP("COPY_U.H r%d, w%d[%d]", wd, ws, n);
+                        t1 = newTemp(Ity_I16);
+
+                        switch (n) {
+                           case 0:
+                              assign(t1,
+                                     unop(Iop_32to16,
+                                          unop(Iop_64to32,
+                                               unop(Iop_V128to64,
+                                                    getWReg(ws)))));
+                              break;
+
+                           case 1:
+                              assign(t1,
+                                     unop(Iop_32HIto16,
+                                          unop(Iop_64to32,
+                                               unop(Iop_V128to64,
+                                                    getWReg(ws)))));
+                              break;
+
+                           case 2:
+                              assign(t1,
+                                     unop(Iop_32to16,
+                                          unop(Iop_64HIto32,
+                                               unop(Iop_V128to64,
+                                                    getWReg(ws)))));
+                              break;
+
+                           case 3:
+                              assign(t1,
+                                     unop(Iop_32HIto16,
+                                          unop(Iop_64HIto32,
+                                               unop(Iop_V128to64,
+                                                    getWReg(ws)))));
+                              break;
+
+                           case 4:
+                              assign(t1,
+                                     unop(Iop_32to16,
+                                          unop(Iop_64to32,
+                                               unop(Iop_V128HIto64,
+                                                    getWReg(ws)))));
+                              break;
+
+                           case 5:
+                              assign(t1,
+                                     unop(Iop_32HIto16,
+                                          unop(Iop_64to32,
+                                               unop(Iop_V128HIto64,
+                                                    getWReg(ws)))));
+                              break;
+
+                           case 6:
+                              assign(t1,
+                                     unop(Iop_32to16,
+                                          unop(Iop_64HIto32,
+                                               unop(Iop_V128HIto64,
+                                                    getWReg(ws)))));
+                              break;
+
+                           case 7:
+                              assign(t1,
+                                     unop(Iop_32HIto16,
+                                          unop(Iop_64HIto32,
+                                               unop(Iop_V128HIto64,
+                                                    getWReg(ws)))));
+                              break;
+                        }
+
+                        putIReg(wd,
+                                unop(mode64 ? Iop_16Uto64 : Iop_16Uto32,
+                                     mkexpr(t1)));
+                        break;
+
+                     case 0x30: /* COPY_U.W */
+                        DIP("COPY_U.W r%d, w%d[%d]", wd, ws, n);
+
+                        switch (n) {
+                           case 0:
+                              putIReg(wd,
+                                      mkWidenFrom32(ty,
+                                                    unop(Iop_V128to32,
+                                                         getWReg(ws)),
+                                                    False));
+                              break;
+
+                           case 1:
+                              t2 = newTemp(Ity_I64);
+                              assign(t2,
+                                     unop(Iop_V128to64,
+                                          getWReg(ws)));
+                              putIReg(wd,
+                                      mkWidenFrom32(ty,
+                                                    unop(Iop_64HIto32,
+                                                         mkexpr(t2)),
+                                                    False));
+                              break;
+
+                           case 2:
+                              t2 = newTemp(Ity_I64);
+                              assign(t2,
+                                     unop(Iop_V128HIto64,
+                                          getWReg(ws)));
+                              putIReg(wd,
+                                      mkWidenFrom32(ty,
+                                                    unop(Iop_64to32,
+                                                         mkexpr(t2)),
+                                                    False));
+                              break;
+
+                           case 3:
+                              t2 = newTemp(Ity_I64);
+                              assign(t2,
+                                     unop(Iop_V128HIto64,
+                                          getWReg(ws)));
+                              putIReg(wd,
+                                      mkWidenFrom32(ty,
+                                                    unop(Iop_64HIto32,
+                                                         mkexpr(t2)),
+                                                    False));
+                              break;
+
+                           default:
+                              break;
+                        }
+
+                        break;
+
+                     default:
+                        return -1;
+                  }
+
+                  break;
+               }
+
+            case 0x04: { /* INSERT.df */
+                  t5 = newTemp(Ity_I64);
+                  UInt hi = 1;
+                  ULong mask;
+                  IRTemp *src, *dst;
+                  assign(t5, mode64 ? getIReg(ws) :
+                         unop(Iop_32Uto64, getIReg(ws)));
+
+                  if (df == 0x38) { /* INSERT.D */
+                     if (mode64) {
+                        DIP("INSERT.D w%d[%d], r%d", wd, n, ws);
+
+                        if (n == 0) {
+                           putWReg(wd,
+                                   binop(Iop_64HLtoV128,
+                                         unop(Iop_V128HIto64,
+                                              getWReg(wd)),
+                                         mkexpr(t5)));
+                        } else {
+                           putWReg(wd,
+                                   binop(Iop_64HLtoV128,
+                                         mkexpr(t5),
+                                         unop(Iop_V128to64,
+                                              getWReg(wd))));
+                        }
+
+                        break;
+                     } else {
+                        return -2;
+                     }
+                  } else {
+                     t1 = newTemp(Ity_I64);
+                     t2 = newTemp(Ity_I64);
+                     assign(t1, unop(Iop_V128to64, getWReg(wd)));
+                     assign(t2, unop(Iop_V128HIto64, getWReg(wd)));
+                  }
+
+                  switch (df) {
+                     case 0x00: /* INSERT.B */
+                        DIP("INSERT.B w%d[%d], r%d", wd, n, ws);
+
+                        if (n >= 8) {
+                           n -= 8;
+                        } else {
+                           hi = 0;
+                        }
+
+                        n <<= 3;
+                        mask = 0xFFull;
+                        break;
+
+                     case 0x20: /* INSERT.H */
+                        DIP("INSERT.H w%d[%d], r%d", wd, n, ws);
+
+                        if (n >= 4) {
+                           n -= 4;
+                        } else {
+                           hi = 0;
+                        }
+
+                        n <<= 4;
+                        mask = 0xFFFFull;
+                        break;
+
+                     case 0x30: /* INSERT.W */
+                        DIP("INSERT.W w%d[%d], r%d", wd, n, ws);
+
+                        if (n >= 2) {
+                           n -= 2;
+                        } else {
+                           hi = 0;
+                        }
+
+                        n <<= 5;
+                        mask = 0xFFFFFFFFull;
+                        break;
+
+                     default:
+                        return -1;
+                  }
+
+                  if (hi) {
+                     t4 = newTemp(Ity_I64);
+                     src = &t2;
+                     dst = &t4;
+                     t3 = t1;
+                  } else {
+                     t3 = newTemp(Ity_I64);
+                     src = &t1;
+                     dst = &t3;
+                     t4 = t2;
+                  }
+
+                  mask <<= n;
+                  assign(*dst,
+                      binop(Iop_Or64,
+                            binop(Iop_And64, mkexpr(*src), mkU64(~mask)),
+                            binop(Iop_And64,
+                                  binop(Iop_Shl64, mkexpr(t5), mkU8(n)),
+                                  mkU64(mask))));
+                  putWReg(wd,
+                          binop(Iop_64HLtoV128, mkexpr(t4), mkexpr(t3)));
+                  break;
+               }
+
+            case 0x05: { /* INSVE.df */
+                  switch (df) {
+                     case 0x00: { /* INSVE.B */
+                           DIP("INSVE.B w%d[%d], w%d[0]", wd, n, ws);
+                           t1 = newTemp(Ity_V128);
+                           t2 = newTemp(Ity_V128);
+                           assign(t1, getWReg(wd));
+                           assign(t2, getWReg(ws));
+                           Int i;
+                           IRTemp tmp[16];
+
+                           for (i = 0; i < 16; i++) {
+                              tmp[i] = newTemp(Ity_I8);
+
+                              if (n == i)
+                                 assign(tmp[i],
+                                        binop(Iop_GetElem8x16,
+                                              mkexpr(t2), mkU8(0x0)));
+                              else
+                                 assign(tmp[i],
+                                        binop(Iop_GetElem8x16,
+                                              mkexpr(t1), mkU8(i)));
+                           }
+
+                           putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_32HLto64,
+                                         binop(Iop_16HLto32,
+                                               binop(Iop_8HLto16,
+                                                     mkexpr(tmp[15]),
+                                                     mkexpr(tmp[14])),
+                                               binop(Iop_8HLto16,
+                                                     mkexpr(tmp[13]),
+                                                     mkexpr(tmp[12]))),
+                                         binop(Iop_16HLto32,
+                                               binop(Iop_8HLto16,
+                                                     mkexpr(tmp[11]),
+                                                     mkexpr(tmp[10])),
+                                               binop(Iop_8HLto16,
+                                                     mkexpr(tmp[9]),
+                                                     mkexpr(tmp[8])))),
+                                   binop(Iop_32HLto64,
+                                         binop(Iop_16HLto32,
+                                               binop(Iop_8HLto16,
+                                                     mkexpr(tmp[7]),
+                                                     mkexpr(tmp[6])),
+                                               binop(Iop_8HLto16,
+                                                     mkexpr(tmp[5]),
+                                                     mkexpr(tmp[4]))),
+                                         binop(Iop_16HLto32,
+                                               binop(Iop_8HLto16,
+                                                     mkexpr(tmp[3]),
+                                                     mkexpr(tmp[2])),
+                                               binop(Iop_8HLto16,
+                                                     mkexpr(tmp[1]),
+                                                     mkexpr(tmp[0]))))));
+                           break;
+                        }
+
+                     case 0x20: { /* INSVE.H */
+                           DIP("INSVE.H w%d[%d], r%d[0]", wd, n, ws);
+                           t1 = newTemp(Ity_V128);
+                           t2 = newTemp(Ity_V128);
+                           assign(t1, getWReg(wd));
+                           assign(t2, getWReg(ws));
+                           Int i;
+                           IRTemp tmp[8];
+
+                           for (i = 0; i < 8; i++) {
+                              tmp[i] = newTemp(Ity_I16);
+
+                              if (n == i)
+                                 assign(tmp[i],
+                                        binop(Iop_GetElem16x8,
+                                              mkexpr(t2), mkU8(0x0)));
+                              else
+                                 assign(tmp[i],
+                                        binop(Iop_GetElem16x8,
+                                              mkexpr(t1), mkU8(i)));
+                           }
+
+                           putWReg(wd,
+                                   binop(Iop_64HLtoV128,
+                                         binop(Iop_32HLto64,
+                                               binop(Iop_16HLto32,
+                                                     mkexpr(tmp[7]),
+                                                     mkexpr(tmp[6])),
+                                               binop(Iop_16HLto32,
+                                                     mkexpr(tmp[5]),
+                                                     mkexpr(tmp[4]))),
+                                         binop(Iop_32HLto64,
+                                               binop(Iop_16HLto32,
+                                                     mkexpr(tmp[3]),
+                                                     mkexpr(tmp[2])),
+                                               binop(Iop_16HLto32,
+                                                     mkexpr(tmp[1]),
+                                                     mkexpr(tmp[0])))));
+                           break;
+                        }
+
+                     case 0x30: { /* INSVE.W */
+                           DIP("INSVE.W w%d[%d], r%d[0]", wd, n, ws);
+                           t1 = newTemp(Ity_V128);
+                           t2 = newTemp(Ity_V128);
+                           assign(t1, getWReg(wd));
+                           assign(t2, getWReg(ws));
+                           Int i;
+                           IRTemp tmp[4];
+
+                           for (i = 0; i < 4; i++) {
+                              tmp[i] = newTemp(Ity_I32);
+
+                              if (n == i)
+                                 assign(tmp[i],
+                                        binop(Iop_GetElem32x4,
+                                              mkexpr(t2), mkU8(0x0)));
+                              else
+                                 assign(tmp[i],
+                                        binop(Iop_GetElem32x4,
+                                              mkexpr(t1), mkU8(i)));
+                           }
+
+                           putWReg(wd,
+                                   binop(Iop_64HLtoV128,
+                                         binop(Iop_32HLto64,
+                                               mkexpr(tmp[3]),
+                                               mkexpr(tmp[2])),
+                                         binop(Iop_32HLto64,
+                                               mkexpr(tmp[1]),
+                                               mkexpr(tmp[0]))));
+                           break;
+                        }
+
+                     case 0x38: { /* INSVE.D */
+                           DIP("INSVE.D w%d[%d], r%d[0]", wd, n, ws);
+                           t1 = newTemp(Ity_V128);
+                           t2 = newTemp(Ity_V128);
+                           assign(t1, getWReg(wd));
+                           assign(t2, getWReg(ws));
+                           Int i;
+                           IRTemp tmp[2];
+
+                           for (i = 0; i < 2; i++) {
+                              tmp[i] = newTemp(Ity_I64);
+
+                              if (n == i)
+                                 assign(tmp[i],
+                                        binop(Iop_GetElem64x2,
+                                              mkexpr(t2), mkU8(0x0)));
+                              else
+                                 assign(tmp[i],
+                                        binop(Iop_GetElem64x2,
+                                              mkexpr(t1), mkU8(i)));
+                           }
+
+                           putWReg(wd,
+                                   binop(Iop_64HLtoV128,
+                                         mkexpr(tmp[1]), mkexpr(tmp[0])));
+                           break;
+                        }
+                  }
+
+                  break;
+               }
+
+            default:
+               return -1;
+      }
+   }
+   return 0;
+}
+
+static Int msa_VEC(UInt cins, UChar wd, UChar ws) { /* VEC */
+   IRTemp t1, t2, t3;
+   UShort operation;
+   UChar wt;
+
+   vassert((cins & 0x03000000) == 0);
+
+   operation = (cins & 0x03E00000) >> 21;
+   wt = (cins & 0x001F0000) >> 16;
+
+   switch (operation) {
+      case 0x00: { /* AND.V */
+            DIP("AND.V w%d, w%d, w%d", wd, ws, wt);
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            t3 = newTemp(Ity_V128);
+            assign(t1, getWReg(ws));
+            assign(t2, getWReg(wt));
+            assign(t3, binop(Iop_AndV128, mkexpr(t1), mkexpr(t2)));
+            putWReg(wd, mkexpr(t3));
+            break;
+         }
+
+      case 0x01: { /* OR.V */
+            DIP("OR.V w%d, w%d, w%d", wd, ws, wt);
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            t3 = newTemp(Ity_V128);
+            assign(t1, getWReg(ws));
+            assign(t2, getWReg(wt));
+            assign(t3, binop(Iop_OrV128, mkexpr(t1), mkexpr(t2)));
+            putWReg(wd, mkexpr(t3));
+            break;
+         }
+
+      case 0x02: { /* NOR.V */
+            DIP("NOR.V w%d, w%d, w%d", wd, ws, wt);
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            t3 = newTemp(Ity_V128);
+            assign(t1, getWReg(ws));
+            assign(t2, getWReg(wt));
+            assign(t3,
+                   unop(Iop_NotV128,
+                        binop(Iop_OrV128, mkexpr(t1), mkexpr(t2))));
+            putWReg(wd, mkexpr(t3));
+            break;
+         }
+
+      case 0x03: { /* XOR.V */
+            DIP("XOR.V w%d, w%d, w%d", wd, ws, wt);
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            t3 = newTemp(Ity_V128);
+            assign(t1, getWReg(ws));
+            assign(t2, getWReg(wt));
+            assign(t3, binop(Iop_XorV128, mkexpr(t1), mkexpr(t2)));
+            putWReg(wd, mkexpr(t3));
+            break;
+         }
+
+      case 0x04: { /* BMNZ  (ws AND wt) OR (wd AND NOT wt) */
+            DIP("BMNZ.V w%d, w%d, w%d", wd, ws, wt);
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            t3 = newTemp(Ity_V128);
+            assign(t1,
+                   binop(Iop_AndV128,
+                         getWReg(ws), getWReg(wt)));
+            assign(t2,
+                   binop(Iop_AndV128,
+                         getWReg(wd),
+                         unop(Iop_NotV128, getWReg(wt))));
+            assign(t3, binop(Iop_OrV128, mkexpr(t1), mkexpr(t2)));
+            putWReg(wd, mkexpr(t3));
+            break;
+         }
+
+      case 0x05: { /* BMZ.V (ws AND NOT wt) OR (wd AND wt) */
+            DIP("BMZ.V w%d, w%d, w%d", wd, ws, wt);
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            t3 = newTemp(Ity_V128);
+            assign(t1,
+                   binop(Iop_AndV128,
+                         getWReg(wd), getWReg(wt)));
+            assign(t2,
+                   binop(Iop_AndV128,
+                         getWReg(ws),
+                         unop(Iop_NotV128, getWReg(wt))));
+            assign(t3, binop(Iop_OrV128, mkexpr(t1), mkexpr(t2)));
+            putWReg(wd, mkexpr(t3));
+            break;
+         }
+
+      case 0x06: { /* BSEL (ws AND NOT wd) OR (wt AND wd) */
+            DIP("BSEL.V w%d, w%d, w%d", wd, ws, wt);
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            t3 = newTemp(Ity_V128);
+            assign(t1,
+                   binop(Iop_AndV128,
+                         getWReg(wd), getWReg(wt)));
+            assign(t2,
+                   binop(Iop_AndV128,
+                         getWReg(ws),
+                         unop(Iop_NotV128, getWReg(wd))));
+            assign(t3, binop(Iop_OrV128, mkexpr(t1), mkexpr(t2)));
+            putWReg(wd, mkexpr(t3));
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_2R(UInt cins, UChar wd, UChar ws) { /* 2R */
+   IRTemp t1, t2, t3, t4;
+   IRType ty;
+   UShort operation;
+   UChar df;
+
+   vassert((cins & 0x00200000) == 0);
+
+   operation = (cins & 0x03FC0000) >> 18;
+   df = (cins & 0x00030000) >> 16;
+   ty = mode64 ? Ity_I64 : Ity_I32;
+
+   switch (operation) {
+      case 0xC0: { /* FILL.df */
+            t1 = newTemp(Ity_I64);
+
+            switch (df) {
+               case 0x00: /* FILL.B */
+                  DIP("FILL.B w%d, r%d", wd, ws);
+                  t2 = newTemp(Ity_I32);
+                  t3 = newTemp(Ity_I16);
+                  t4 = newTemp(Ity_I8);
+                  assign(t4, mkNarrowTo8(ty, getIReg(ws)));
+                  assign(t3,
+                         binop(Iop_8HLto16, mkexpr(t4), mkexpr(t4)));
+                  assign(t2,
+                         binop(Iop_16HLto32, mkexpr(t3), mkexpr(t3)));
+                  assign(t1,
+                         binop(Iop_32HLto64, mkexpr(t2), mkexpr(t2)));
+                  break;
+
+               case 0x01: /* FILL.H */
+                  DIP("FILL.H w%d, r%d", wd, ws);
+                  t2 = newTemp(Ity_I32);
+                  t3 = newTemp(Ity_I16);
+                  assign(t3, mkNarrowTo16(ty, getIReg(ws)));
+                  assign(t2,
+                         binop(Iop_16HLto32, mkexpr(t3), mkexpr(t3)));
+                  assign(t1,
+                         binop(Iop_32HLto64, mkexpr(t2), mkexpr(t2)));
+                  break;
+
+               case 0x02: /* FILL.W */
+                  DIP("FILL.W w%d, r%d", wd, ws);
+                  t2 = newTemp(Ity_I32);
+                  assign(t2, mkNarrowTo32(ty, getIReg(ws)));
+                  assign(t1,
+                         binop(Iop_32HLto64, mkexpr(t2), mkexpr(t2)));
+                  break;
+
+               case 0x03: /* FILL.D */
+                  if (mode64) {
+                     DIP("FILL.W w%d, r%d", wd, ws);
+                     t2 = newTemp(Ity_I32);
+                     assign(t1, getIReg(ws));
+                  } else {
+                     return -2;
+                  }
+
+                  break;
+
+               default:
+                  return -1;
+            }
+
+            putWReg(wd,
+                    binop(Iop_64HLtoV128, mkexpr(t1), mkexpr(t1)));
+            break;
+         }
+
+      case 0xC1: { /* PCNT.df */
+            switch (df) {
+               case 0x00: /* PCNT.B */
+                  DIP("PCNT.B w%d, r%d", wd, ws);
+                  putWReg(wd,
+                          unop(Iop_Cnt8x16, getWReg(ws)));
+                  break;
+
+               case 0x01: /* PCNT.H */
+                  DIP("PCNT.H w%d, r%d", wd, ws);
+                  t1 = newTemp(Ity_V128);
+                  t2 = newTemp(Ity_V128);
+                  assign(t1, unop(Iop_Cnt8x16, getWReg(ws)));
+                  assign(t2,
+                      binop(Iop_Add16x8,
+                         binop(Iop_AndV128,
+                               mkexpr(t1),
+                               binop(Iop_64HLtoV128,
+                                     mkU64(0x00FF00FF00FF00FFULL),
+                                     mkU64(0x00FF00FF00FF00FFULL))),
+                         binop(Iop_AndV128,
+                               binop(Iop_ShrN16x8,
+                                     mkexpr(t1), mkU8(8)),
+                               binop(Iop_64HLtoV128,
+                                     mkU64(0x00FF00FF00FF00FFULL),
+                                     mkU64(0x00FF00FF00FF00FFULL)))));
+                  putWReg(wd, mkexpr(t2));
+                  break;
+
+               case 0x02: /* PCNT.W */
+                  DIP("PCNT.W w%d, r%d", wd, ws);
+                  t1 = newTemp(Ity_V128);
+                  t2 = newTemp(Ity_V128);
+                  t3 = newTemp(Ity_V128);
+                  assign(t1, unop(Iop_Cnt8x16, getWReg(ws)));
+                  assign(t2,
+                      binop(Iop_Add32x4,
+                         binop(Iop_AndV128,
+                               mkexpr(t1),
+                               binop(Iop_64HLtoV128,
+                                     mkU64(0x00FF00FF00FF00FFULL),
+                                     mkU64(0x00FF00FF00FF00FFULL))),
+                         binop(Iop_AndV128,
+                               binop(Iop_ShrN32x4,
+                                     mkexpr(t1), mkU8(8)),
+                               binop(Iop_64HLtoV128,
+                                     mkU64(0x00FF00FF00FF00FFULL),
+                                     mkU64(0x00FF00FF00FF00FFULL)))));
+                  assign(t3,
+                      binop(Iop_Add32x4,
+                         binop(Iop_AndV128,
+                               mkexpr(t2),
+                               binop(Iop_64HLtoV128,
+                                     mkU64(0x0000FFFF0000FFFFULL),
+                                     mkU64(0x0000FFFF0000FFFFULL))),
+                         binop(Iop_AndV128,
+                               binop(Iop_ShrN32x4,
+                                     mkexpr(t2), mkU8(16)),
+                               binop(Iop_64HLtoV128,
+                                     mkU64(0x0000FFFF0000FFFFULL),
+                                     mkU64(0x0000FFFF0000FFFFULL)))));
+                  putWReg(wd, mkexpr(t3));
+                  break;
+
+               case 0x03: /* PCNT.D */
+                  DIP("PCNT.D w%d, r%d", wd, ws);
+                  t1 = newTemp(Ity_V128);
+                  t2 = newTemp(Ity_V128);
+                  t3 = newTemp(Ity_V128);
+                  t4 = newTemp(Ity_V128);;
+                  assign(t1, unop(Iop_Cnt8x16, getWReg(ws)));
+                  assign(t2,
+                      binop(Iop_Add64x2,
+                         binop(Iop_AndV128,
+                               mkexpr(t1),
+                               binop(Iop_64HLtoV128,
+                                     mkU64(0x00FF00FF00FF00FFULL),
+                                     mkU64(0x00FF00FF00FF00FFULL))),
+                         binop(Iop_AndV128,
+                               binop(Iop_ShrN64x2,
+                                     mkexpr(t1), mkU8(8)),
+                               binop(Iop_64HLtoV128,
+                                     mkU64(0x00FF00FF00FF00FFULL),
+                                     mkU64(0x00FF00FF00FF00FFULL)))));
+                  assign(t3,
+                      binop(Iop_Add64x2,
+                         binop(Iop_AndV128,
+                               mkexpr(t2),
+                               binop(Iop_64HLtoV128,
+                                     mkU64(0x0000FFFF0000FFFFULL),
+                                     mkU64(0x0000FFFF0000FFFFULL))),
+                         binop(Iop_AndV128,
+                               binop(Iop_ShrN64x2,
+                                     mkexpr(t2), mkU8(16)),
+                               binop(Iop_64HLtoV128,
+                                     mkU64(0x0000FFFF0000FFFFULL),
+                                     mkU64(0x0000FFFF0000FFFFULL)))));
+                  assign(t4,
+                      binop(Iop_Add64x2,
+                         binop(Iop_AndV128,
+                               mkexpr(t3),
+                               binop(Iop_64HLtoV128,
+                                     mkU64(0x00000000FFFFFFFFULL),
+                                     mkU64(0x00000000FFFFFFFFULL))),
+                         binop(Iop_AndV128,
+                               binop(Iop_ShrN64x2,
+                                     mkexpr(t3), mkU8(32)),
+                               binop(Iop_64HLtoV128,
+                                     mkU64(0x00000000FFFFFFFFULL),
+                                     mkU64(0x00000000FFFFFFFFULL)))));
+                  putWReg(wd, mkexpr(t4));
+                  break;
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0xC2: { /* NLOC.df */
+            switch (df) {
+               case 0x00: /* NLOC.B */
+                  DIP("NLOC.B w%d, w%d", wd, ws);
+                  putWReg(wd,
+                          unop(Iop_Cls8x16, getWReg(ws)));
+                  break;
+
+               case 0x01: /* NLOC.H */
+                  DIP("NLOC.H w%d, w%d", wd, ws);
+                  putWReg(wd,
+                          unop(Iop_Cls16x8, getWReg(ws)));
+                  break;
+
+               case 0x02: /* NLOC.W */
+                  DIP("NLOC.W w%d, w%d", wd, ws);
+                  putWReg(wd,
+                          unop(Iop_Cls32x4, getWReg(ws)));
+                  break;
+
+               case 0x03: /* NLOC.D */
+                  DIP("NLOC.D w%d, w%d", wd, ws);
+                  t1 = newTemp(Ity_V128);
+                  assign(t1, unop(Iop_NotV128, getWReg(ws)));
+                  putWReg(wd, unop(Iop_Clz64x2, mkexpr(t1)));
+                  break;
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0xC3: { /* NLZC.df */
+            switch (df) {
+               case 0x00: /* NLZC.B */
+                  DIP("NLZC.W w%d, w%d", wd, ws);
+                  putWReg(wd,
+                          unop(Iop_Clz8x16, getWReg(ws)));
+                  break;
+
+               case 0x01: /* NLZC.H */
+                  DIP("NLZC.H w%d, w%d", wd, ws);
+                  putWReg(wd,
+                          unop(Iop_Clz16x8, getWReg(ws)));
+                  break;
+
+               case 0x02: /* NLZC.W */
+                  DIP("NLZC.W w%d, w%d", wd, ws);
+                  putWReg(wd,
+                          unop(Iop_Clz32x4, getWReg(ws)));
+                  break;
+
+               case 0x03: {/* NLZC.D */
+                     putWReg(wd,
+                             unop(Iop_Clz64x2, getWReg(ws)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_2RF(UInt cins, UChar wd, UChar ws) { /* 2RF */
+   IRTemp t1, t2, t3, t4, t5;
+   UShort operation;
+   UChar df, wt;
+
+   operation = (cins & 0x03FE0000) >> 17;
+   df = (cins & 0x00010000) >> 16;
+   wt = (cins & 0x001F0000) >> 16;
+
+   switch (operation) {
+
+      case 0x190: { /* FCLASS.df */
+            IRTemp t0 = newTemp(Ity_V128);
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            t3 = newTemp(Ity_V128);
+            t4 = newTemp(Ity_V128);
+            t5 = newTemp(Ity_V128);
+
+            switch (df) {
+               case 0x00: { /* FCLASS.W */
+                     DIP("FCLASS.W w%d, w%d", wd, ws);
+                     assign(t0,
+                         binop(Iop_CmpEQ32x4,
+                            binop(Iop_AndV128,
+                                  getWReg(ws),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x7F8000007F800000ull),
+                                        mkU64(0x7F8000007F800000ull))),
+                            binop(Iop_64HLtoV128,
+                                  mkU64(0ull), mkU64(0ull))));
+                     assign(t1,
+                         binop(Iop_CmpEQ32x4,
+                            binop(Iop_AndV128,
+                                  getWReg(ws),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x7F8000007F800000ull),
+                                        mkU64(0x7F8000007F800000ull))),
+                            binop(Iop_64HLtoV128,
+                                  mkU64(0x7F8000007F800000ull),
+                                  mkU64(0x7F8000007F800000ull))));
+                     assign(t2,
+                            binop(Iop_SarN32x4,
+                                  getWReg(ws), mkU8(31)));
+                     assign(t3,
+                         binop(Iop_CmpEQ32x4,
+                            binop(Iop_AndV128,
+                                  getWReg(ws),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x0040000000400000ull),
+                                        mkU64(0x0040000000400000ull))),
+                            binop(Iop_64HLtoV128,
+                                  mkU64(0x0040000000400000ull),
+                                  mkU64(0x0040000000400000ull))));
+                     assign(t4,
+                         binop(Iop_CmpEQ32x4,
+                            binop(Iop_AndV128,
+                                  getWReg(ws),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x007FFFFF007FFFFFULL),
+                                        mkU64(0x007FFFFF007FFFFFULL))),
+                            binop(Iop_64HLtoV128,
+                                  mkU64(0ull), mkU64(0ull))));
+                     assign(t5,
+                         binop(Iop_Shl32x4,
+                            binop(Iop_OrV128,
+                               binop(Iop_AndV128,
+                                  mkexpr(t1),
+                                  binop(Iop_AndV128,
+                                        mkexpr(t4),
+                                        binop(Iop_64HLtoV128,
+                                           mkU64(0x100000001ull),
+                                           mkU64(0x100000001ull)))),
+                               binop(Iop_OrV128,
+                                  binop(Iop_AndV128,
+                                     mkexpr(t0),
+                                     binop(Iop_OrV128,
+                                        binop(Iop_AndV128,
+                                           mkexpr(t4),
+                                           binop(Iop_64HLtoV128,
+                                              mkU64(0x800000008ull),
+                                              mkU64(0x800000008ull))),
+                                        binop(Iop_AndV128,
+                                           unop(Iop_NotV128,
+                                              mkexpr(t4)),
+                                           binop(Iop_64HLtoV128,
+                                              mkU64(0x400000004ull),
+                                              mkU64(0x400000004ull))))),
+                                  binop(Iop_AndV128,
+                                     unop(Iop_NotV128,
+                                          mkexpr(t1)),
+                                     binop(Iop_AndV128,
+                                        unop(Iop_NotV128,
+                                           mkexpr(t0)),
+                                        binop(Iop_64HLtoV128,
+                                           mkU64(0x200000002ull),
+                                           mkU64(0x200000002ull)))))),
+                         binop(Iop_OrV128,
+                               binop(Iop_AndV128,
+                                     mkexpr(t2),
+                                     binop(Iop_64HLtoV128,
+                                           mkU64(0x200000002ull),
+                                           mkU64(0x200000002ull))),
+                               binop(Iop_AndV128,
+                                     unop(Iop_NotV128,
+                                          mkexpr(t2)),
+                                     binop(Iop_64HLtoV128,
+                                           mkU64(0x600000006ull),
+                                           mkU64(0x600000006ull))))));
+                     putWReg(wd,
+                       binop(Iop_OrV128,
+                          mkexpr(t5),
+                          binop(Iop_AndV128,
+                             binop(Iop_CmpEQ32x4,
+                                mkexpr(t5),
+                                binop(Iop_64HLtoV128,
+                                      mkU64(0ull),
+                                      mkU64(0ull))),
+                             binop(Iop_OrV128,
+                                binop(Iop_AndV128,
+                                      mkexpr(t3),
+                                      binop(Iop_64HLtoV128,
+                                         mkU64(0x100000001ull),
+                                         mkU64(0x100000001ull))),
+                                binop(Iop_AndV128,
+                                      unop(Iop_NotV128, mkexpr(t3)),
+                                      binop(Iop_64HLtoV128,
+                                         mkU64(0x200000002ull),
+                                         mkU64(0x200000002ull)))))));
+                     break;
+                  }
+
+               case 0x01: { /* FCLASS.D */
+                     DIP("FCLASS.D w%d, w%d", wd, ws);
+                     assign(t0,
+                         binop(Iop_CmpEQ64x2,
+                            binop(Iop_AndV128,
+                                  getWReg(ws),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x7FF0000000000000ull),
+                                        mkU64(0x7FF0000000000000ull))),
+                            binop(Iop_64HLtoV128,
+                                  mkU64(0ull), mkU64(0ull))));
+                     assign(t1,
+                         binop(Iop_CmpEQ64x2,
+                            binop(Iop_AndV128,
+                                  getWReg(ws),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x7FF0000000000000ull),
+                                        mkU64(0x7FF0000000000000ull))),
+                            binop(Iop_64HLtoV128,
+                                  mkU64(0x7FF0000000000000ull),
+                                  mkU64(0x7FF0000000000000ull))));
+                     assign(t2,
+                            binop(Iop_SarN64x2,
+                                  getWReg(ws), mkU8(63)));
+                     assign(t3,
+                         binop(Iop_CmpEQ64x2,
+                            binop(Iop_AndV128,
+                                  getWReg(ws),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x0008000000000000ull),
+                                        mkU64(0x0008000000000000ull))),
+                            binop(Iop_64HLtoV128,
+                                  mkU64(0x0008000000000000ull),
+                                  mkU64(0x0008000000000000ull))));
+                     assign(t4,
+                         binop(Iop_CmpEQ64x2,
+                            binop(Iop_AndV128,
+                                  getWReg(ws),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x000FFFFFFFFFFFFFULL),
+                                        mkU64(0x000FFFFFFFFFFFFFULL))),
+                            binop(Iop_64HLtoV128,
+                                  mkU64(0ull), mkU64(0ull))));
+                     assign(t5,
+                         binop(Iop_Shl64x2,
+                            binop(Iop_OrV128,
+                               binop(Iop_AndV128,
+                                  mkexpr(t1),
+                                  binop(Iop_AndV128,
+                                     mkexpr(t4),
+                                     binop(Iop_64HLtoV128,
+                                           mkU64(1ull),
+                                           mkU64(1ull)))),
+                               binop(Iop_OrV128,
+                                  binop(Iop_AndV128,
+                                     mkexpr(t0),
+                                     binop(Iop_OrV128,
+                                           binop(Iop_AndV128,
+                                                 mkexpr(t4),
+                                                 binop(Iop_64HLtoV128,
+                                                       mkU64(8ull),
+                                                       mkU64(8ull))),
+                                           binop(Iop_AndV128,
+                                                 unop(Iop_NotV128,
+                                                       mkexpr(t4)),
+                                                 binop(Iop_64HLtoV128,
+                                                       mkU64(4ull),
+                                                       mkU64(4ull))))),
+                                  binop(Iop_AndV128,
+                                     unop(Iop_NotV128,
+                                          mkexpr(t1)),
+                                     binop(Iop_AndV128,
+                                           unop(Iop_NotV128,
+                                                 mkexpr(t0)),
+                                           binop(Iop_64HLtoV128,
+                                                 mkU64(2ull),
+                                                 mkU64(2ull)))))),
+                            binop(Iop_OrV128,
+                               binop(Iop_AndV128,
+                                  mkexpr(t2),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(2ull),
+                                        mkU64(2ull))),
+                               binop(Iop_AndV128,
+                                  unop(Iop_NotV128,
+                                       mkexpr(t2)),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(6ull),
+                                        mkU64(6ull))))));
+                     putWReg(wd,
+                          binop(Iop_OrV128,
+                             mkexpr(t5),
+                             binop(Iop_AndV128,
+                                binop(Iop_CmpEQ64x2,
+                                      mkexpr(t5),
+                                      binop(Iop_64HLtoV128,
+                                            mkU64(0ull),
+                                            mkU64(0ull))),
+                                binop(Iop_OrV128,
+                                      binop(Iop_AndV128,
+                                            mkexpr(t3),
+                                            binop(Iop_64HLtoV128,
+                                                  mkU64(1ull),
+                                                  mkU64(1ull))),
+                                      binop(Iop_AndV128,
+                                            unop(Iop_NotV128,
+                                                 mkexpr(t3)),
+                                            binop(Iop_64HLtoV128,
+                                                  mkU64(2ull),
+                                                  mkU64(2ull)))))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x191: { /* FTRUNC_S.df */
+            switch (df) {
+               case 0x00: { /* FTRUNC_S.W */
+                     DIP("FTRUNC_S.W w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wd, FTRUNCSW, 1);
+                     putWReg(wd, unop(Iop_FtoI32Sx4_RZ, getWReg(ws)));
+                     break;
+                  }
+
+               case 0x01: { /* FTRUNC_S.D */
+                     DIP("FTRUNC_S.D w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wd, FTRUNCSD, 1);
+                     t1 = newTemp(Ity_I64);
+                     t2 = newTemp(Ity_I64);
+                     t3 = newTemp(Ity_V128);
+                     assign(t3,
+                         binop(Iop_AndV128,
+                               unop(Iop_NotV128,
+                                    binop(Iop_CmpUN64Fx2,
+                                          getWReg(ws),
+                                          getWReg(ws))),
+                               binop(Iop_Max64Fx2,
+                                     getWReg(ws),
+                                     binop(Iop_64HLtoV128,
+                                        mkU64(0xC3E0000000000000),
+                                        mkU64(0xC3E0000000000000)))));
+                     assign(t1,
+                            unop(Iop_ReinterpF64asI64,
+                                 binop(Iop_RoundF64toInt,
+                                       mkU32(0x3),
+                                       unop(Iop_ReinterpI64asF64,
+                                            unop(Iop_V128to64,
+                                                 mkexpr(t3))))));
+                     assign(t2,
+                            unop(Iop_ReinterpF64asI64,
+                                 binop(Iop_RoundF64toInt,
+                                       mkU32(0x3),
+                                       unop(Iop_ReinterpI64asF64,
+                                            unop(Iop_V128HIto64,
+                                                 mkexpr(t3))))));
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   mkexpr(t2), mkexpr(t1)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x192: { /* FTRUNC_U.df */
+            switch (df) {
+               case 0x00: {  /* FTRUNC_U.W */
+                     DIP("FTRUNC_U.W w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wd, FTRUNCUW, 1);
+                     putWReg(wd, unop(Iop_FtoI32Ux4_RZ, getWReg(ws)));
+                     break;
+                  }
+
+               case 0x01: { /* FTRUNC_U.D */
+                     DIP("FTRUNC_U.D w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wd, FTRUNCUD, 1);
+                     t1 = newTemp(Ity_I64);
+                     t2 = newTemp(Ity_I64);
+                     assign(t1,
+                            binop(Iop_F64toI64U,
+                                  mkU32(0x3),
+                                  unop(Iop_ReinterpI64asF64,
+                                       unop(Iop_V128to64,
+                                            getWReg(ws)))));
+                     assign(t2,
+                            binop(Iop_F64toI64U,
+                                  mkU32(0x3),
+                                  unop(Iop_ReinterpI64asF64,
+                                       unop(Iop_V128HIto64,
+                                            getWReg(ws)))));
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   mkexpr(t2), mkexpr(t1)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x193: { /* FSQRT.df */
+            switch (df) {
+               case 0x00: { /* FSQRT.W */
+                     DIP("FSQRT.W w%d, w%d", wd, ws);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     calculateMSACSR(ws, wd, FSQRTW, 1);
+                     putWReg(wd, binop(Iop_Sqrt32Fx4, rm, getWReg(ws)));
+                     break;
+                  }
+
+               case 0x01: { /* FSQRT.D */
+                     DIP("FSQRT.D w%d, w%d", wd, ws);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     calculateMSACSR(ws, wd, FSQRTD, 1);
+                     putWReg(wd, binop(Iop_Sqrt64Fx2, rm, getWReg(ws)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x194: { /* FRSQRT.df */
+            switch (df) {
+               case 0x00: { /* FRSQRT.W */
+                     DIP("FRSQRT.W w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wd, FRSQRTW, 1);
+                     putWReg(wd, unop(Iop_RSqrtEst32Fx4, getWReg(ws)));
+                     break;
+                  }
+
+               case 0x01: { /* FRSQRT.D */
+                     DIP("FRSQRT.D w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wd, FRSQRTD, 1);
+                     putWReg(wd, unop(Iop_RSqrtEst64Fx2, getWReg(ws)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x195: { /* FRCP.df */
+            switch (df) { /* FRCP.W */
+               case 0x00: {
+                     DIP("FRCP.W w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wd, FRCPW, 1);
+                     putWReg(wd, unop(Iop_RecipEst32Fx4, getWReg(ws)));
+                     break;
+                  }
+
+               case 0x01: { /* FRCP.D */
+                     DIP("FRCP.D w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wd, FRCPD, 1);
+                     putWReg(wd, unop(Iop_RecipEst64Fx2, getWReg(ws)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x196: { /* FRINT.df */
+            t1 = newTemp(Ity_V128);
+            t2 = newTemp(Ity_V128);
+            t3 = newTemp(Ity_V128);
+            t4 = newTemp(Ity_V128);
+            IRExpr *rm = get_IR_roundingmode_MSA();
+            assign(t1, getWReg(ws));
+
+            switch (df) {
+               case 0x00: { /* FRINT.W */
+                     DIP("FRINT.W w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wt, FRINTW, 1);
+                     assign(t2,
+                         binop(Iop_OrV128,
+                            binop(Iop_CmpLT32Fx4,
+                               mkexpr(t1),
+                               binop(Iop_64HLtoV128,
+                                     mkU64(0xCF000000CF000000ull),
+                                     mkU64(0xCF000000CF000000ull))),
+                            binop(Iop_CmpLT32Fx4,
+                               binop(Iop_64HLtoV128,
+                                     mkU64(0x4F0000004F000000ull),
+                                     mkU64(0x4F0000004F000000ull)),
+                               mkexpr(t1))));
+                     assign(t3,
+                         binop(Iop_CmpEQ32x4,
+                            binop(Iop_AndV128,
+                               mkexpr(t1),
+                               binop(Iop_64HLtoV128,
+                                     mkU64(0x0040000000400000ull),
+                                     mkU64(0x0040000000400000ull))),
+                            binop(Iop_64HLtoV128,
+                               mkU64(0x0040000000400000ull),
+                               mkU64(0x0040000000400000ull))));
+                     assign(t4,
+                            binop(Iop_CmpUN32Fx4,
+                                  mkexpr(t1), mkexpr(t1)));
+                     IRTemp tmp[4];
+                     Int i;
+
+                     for (i = 0; i < 4; i++) {
+                        tmp[i] = newTemp(Ity_I32);
+                        assign(tmp[i],
+                            unop(Iop_ReinterpF32asI32,
+                              binop(Iop_I32StoF32, rm,
+                                 unop(Iop_ReinterpF32asI32,
+                                      binop(Iop_RoundF32toInt, rm,
+                                            unop(Iop_ReinterpI32asF32,
+                                                 binop(Iop_GetElem32x4,
+                                                       mkexpr(t1),
+                                                       mkU8(i))))))));
+                     }
+
+                     putWReg(wd,
+                          binop(Iop_OrV128,
+                             binop(Iop_OrV128,
+                                binop(Iop_AndV128,
+                                      binop(Iop_OrV128,
+                                            mkexpr(t2),
+                                            binop(Iop_AndV128,
+                                                  mkexpr(t4),
+                                                  unop(Iop_NotV128,
+                                                        mkexpr(t3)))),
+                                      mkexpr(t1)),
+                                binop(Iop_AndV128,
+                                   binop(Iop_AndV128,
+                                         mkexpr(t4),
+                                         mkexpr(t3)),
+                                   binop(Iop_64HLtoV128,
+                                         mkU64(0x7FBFFFFF7FBFFFFF),
+                                         mkU64(0x7FBFFFFF7FBFFFFF)))),
+                             binop(Iop_AndV128,
+                                unop(Iop_NotV128,
+                                     binop(Iop_OrV128,
+                                           mkexpr(t2),
+                                           mkexpr(t4))),
+                                binop(Iop_OrV128,
+                                   binop(Iop_64HLtoV128,
+                                         binop(Iop_32HLto64,
+                                               mkexpr(tmp[3]),
+                                               mkexpr(tmp[2])),
+                                         binop(Iop_32HLto64,
+                                               mkexpr(tmp[1]),
+                                               mkexpr(tmp[0]))),
+                                   binop(Iop_AndV128,
+                                      mkexpr(t1),
+                                      binop(Iop_64HLtoV128,
+                                         mkU64(0x8000000080000000ull),
+                                         mkU64(0x8000000080000000ull)))
+                                ))));
+                     break;
+                  }
+
+               case 0x01: { /* FRINT.D */
+                     DIP("FRINT.D w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wt, FRINTD, 1);
+                     assign(t2,
+                         binop(Iop_OrV128,
+                            binop(Iop_CmpLT64Fx2,
+                                  mkexpr(t1),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0xC3E0000000000000ull),
+                                        mkU64(0xC3E0000000000000ull))),
+                            binop(Iop_CmpLT64Fx2,
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x43E0000000000000ull),
+                                        mkU64(0x43E0000000000000ull)),
+                                  mkexpr(t1))));
+                     assign(t3,
+                         binop(Iop_CmpEQ64x2,
+                            binop(Iop_AndV128,
+                                  getWReg(ws),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x0008000000000000ull),
+                                        mkU64(0x0008000000000000ull))),
+                            binop(Iop_64HLtoV128,
+                                  mkU64(0x0008000000000000ull),
+                                  mkU64(0x0008000000000000ull))));
+                     assign(t4,
+                            binop(Iop_CmpUN64Fx2,
+                                  mkexpr(t1), mkexpr(t1)));
+                     IRTemp tmp[2];
+                     Int i;
+
+                     for (i = 0; i < 2; i++) {
+                        tmp[i] = newTemp(Ity_I64);
+                        assign(tmp[i],
+                            unop(Iop_ReinterpF64asI64,
+                              binop(Iop_I64StoF64, rm,
+                                 unop(Iop_ReinterpF64asI64,
+                                      binop(Iop_RoundF64toInt, rm,
+                                            unop(Iop_ReinterpI64asF64,
+                                                 binop(Iop_GetElem64x2,
+                                                       mkexpr(t1),
+                                                       mkU8(i))))))));
+                     }
+
+                     putWReg(wd,
+                          binop(Iop_OrV128,
+                             binop(Iop_OrV128,
+                                binop(Iop_AndV128,
+                                      binop(Iop_OrV128,
+                                            mkexpr(t2),
+                                            binop(Iop_AndV128,
+                                                  mkexpr(t4),
+                                                  unop(Iop_NotV128,
+                                                        mkexpr(t3)))),
+                                      mkexpr(t1)),
+                                binop(Iop_AndV128,
+                                   binop(Iop_AndV128,
+                                         mkexpr(t4),
+                                         mkexpr(t3)),
+                                   binop(Iop_64HLtoV128,
+                                         mkU64(0x7FF7FFFFFFFFFFFF),
+                                         mkU64(0x7FF7FFFFFFFFFFFF)))),
+                             binop(Iop_AndV128,
+                                unop(Iop_NotV128,
+                                     binop(Iop_OrV128,
+                                           mkexpr(t2),
+                                           mkexpr(t4))),
+                                binop(Iop_OrV128,
+                                   binop(Iop_64HLtoV128,
+                                         mkexpr(tmp[1]),
+                                         mkexpr(tmp[0])),
+                                   binop(Iop_AndV128,
+                                         mkexpr(t1),
+                                         binop(Iop_64HLtoV128,
+                                            mkU64(0x8000000000000000ull),
+                                            mkU64(0x8000000000000000ull))
+                                   )))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x197: { /* FLOG2.df */
+
+            switch (df) {
+               case 0x00: { /* FLOG2.W */
+                     DIP("FLOG2.W w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wt, FLOG2W, 1);
+                     putWReg(wd, unop(Iop_Log2_32Fx4, getWReg(ws)));
+                     break;
+                  }
+
+               case 0x01: { /* FLOG2.D */
+                     DIP("FLOG2.D w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wt, FLOG2D, 1);
+                     putWReg(wd, unop(Iop_Log2_64Fx2, getWReg(ws)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x198: { /* FEXUPL.df */
+            switch (df) {
+               case 0x00: { /* FEXUPL.W */
+                     DIP("FEXUPL.W w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wt, FEXUPLW, 1);
+                     putWReg(wd,
+                             unop(Iop_F16toF32x4,
+                                  unop(Iop_V128HIto64,
+                                       getWReg(ws))));
+                     break;
+                  }
+
+               case 0x01: { /* FEXUPL.D */
+                     DIP("FEXUPL.D w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wt, FEXUPLD, 1);
+                     t1 = newTemp(Ity_I64);
+                     t2 = newTemp(Ity_I64);
+                     assign(t1,
+                            unop(Iop_ReinterpF64asI64,
+                                 unop(Iop_F32toF64,
+                                      unop(Iop_ReinterpI32asF32,
+                                           unop(Iop_64to32,
+                                                unop(Iop_V128HIto64,
+                                                     getWReg(ws)))))));
+                     assign(t2,
+                            unop(Iop_ReinterpF64asI64,
+                                 unop(Iop_F32toF64,
+                                      unop(Iop_ReinterpI32asF32,
+                                           unop(Iop_64HIto32,
+                                                unop(Iop_V128HIto64,
+                                                     getWReg(ws)))))));
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   mkexpr(t2), mkexpr(t1)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x199: { /* FEXUPR.df */
+            switch (df) {
+               case 0x00: { /* FEXUPR.W */
+                     DIP("FEXUPR.W w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wt, FEXUPRW, 1);
+                     putWReg(wd,
+                             unop(Iop_F16toF32x4,
+                                  unop(Iop_V128to64,
+                                       getWReg(ws))));
+                     break;
+                  }
+
+               case 0x01: { /* FEXUPR.D */
+                     DIP("FEXUPR.D w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wt, FEXUPRD, 1);
+                     t1 = newTemp(Ity_I64);
+                     t2 = newTemp(Ity_I64);
+                     assign(t1,
+                            unop(Iop_ReinterpF64asI64,
+                                 unop(Iop_F32toF64,
+                                      unop(Iop_ReinterpI32asF32,
+                                           unop(Iop_64to32,
+                                                unop(Iop_V128to64,
+                                                     getWReg(ws)))))));
+                     assign(t2,
+                            unop(Iop_ReinterpF64asI64,
+                                 unop(Iop_F32toF64,
+                                      unop(Iop_ReinterpI32asF32,
+                                           unop(Iop_64HIto32,
+                                                unop(Iop_V128to64,
+                                                     getWReg(ws)))))));
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   mkexpr(t2), mkexpr(t1)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x19A: { /* FFQL.df */
+            switch (df) {
+               case 0x00: { /* FFQL.W */
+                     DIP("FFQL.W w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wt, FFQLW, 1);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_I64);
+                     t3 = newTemp(Ity_I64);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     assign(t1,
+                            binop(Iop_SarN32x4,
+                                  binop(Iop_InterleaveHI16x8,
+                                        getWReg(ws),
+                                        getWReg(ws)),
+                                  mkU8(16)));
+                     assign(t2,
+                            binop(Iop_32HLto64,
+                                  unop(Iop_ReinterpF32asI32,
+                                       binop(Iop_I32StoF32, rm,
+                                             binop(Iop_GetElem32x4,
+                                                   mkexpr(t1),
+                                                   mkU8(1)))),
+                                  unop(Iop_ReinterpF32asI32,
+                                       binop(Iop_I32StoF32, rm,
+                                             binop(Iop_GetElem32x4,
+                                                   mkexpr(t1),
+                                                   mkU8(0))))));
+                     assign(t3,
+                            binop(Iop_32HLto64,
+                                  unop(Iop_ReinterpF32asI32,
+                                       binop(Iop_I32StoF32, rm,
+                                             binop(Iop_GetElem32x4,
+                                                   mkexpr(t1),
+                                                   mkU8(3)))),
+                                  unop(Iop_ReinterpF32asI32,
+                                       binop(Iop_I32StoF32, rm,
+                                             binop(Iop_GetElem32x4,
+                                                   mkexpr(t1),
+                                                   mkU8(2))))));
+                     putWReg(wd,
+                             triop(Iop_Div32Fx4, rm,
+                                   binop(Iop_64HLtoV128,
+                                         mkexpr(t3), mkexpr(t2)),
+                                   binop(Iop_64HLtoV128,
+                                         mkU64(0x4700000047000000),
+                                         mkU64(0x4700000047000000))));
+                     break;
+                  }
+
+               case 0x01: { /* FFQL.D */
+                     DIP("FFQL.D w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wt, FFQLD, 1);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_I64);
+                     t3 = newTemp(Ity_I64);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     assign(t1,
+                            binop(Iop_SarN64x2,
+                                  binop(Iop_InterleaveHI32x4,
+                                        getWReg(ws),
+                                        getWReg(ws)),
+                                  mkU8(32)));
+                     assign(t2,
+                            unop(Iop_ReinterpF64asI64,
+                                 binop(Iop_I64StoF64, rm,
+                                       unop(Iop_V128to64,
+                                            mkexpr(t1)))));
+                     assign(t3,
+                            unop(Iop_ReinterpF64asI64,
+                                 binop(Iop_I64StoF64, rm,
+                                       unop(Iop_V128HIto64,
+                                            mkexpr(t1)))));
+                     putWReg(wd,
+                             triop(Iop_Div64Fx2, rm,
+                                   binop(Iop_64HLtoV128,
+                                         mkexpr(t3), mkexpr(t2)),
+                                   binop(Iop_64HLtoV128,
+                                         mkU64(0x41E0000000000000),
+                                         mkU64(0x41E0000000000000))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x19B: { /* FFQR.df */
+            switch (df) {
+               case 0x00: { /* FFQR.W */
+                     DIP("FFQR.W w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wt, FFQRW, 1);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_I64);
+                     t3 = newTemp(Ity_I64);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     assign(t1,
+                            binop(Iop_SarN32x4,
+                                  binop(Iop_InterleaveLO16x8,
+                                        getWReg(ws),
+                                        getWReg(ws)),
+                                  mkU8(16)));
+                     assign(t2,
+                            binop(Iop_32HLto64,
+                                  unop(Iop_ReinterpF32asI32,
+                                       binop(Iop_I32StoF32, rm,
+                                             binop(Iop_GetElem32x4,
+                                                   mkexpr(t1),
+                                                   mkU8(1)))),
+                                  unop(Iop_ReinterpF32asI32,
+                                       binop(Iop_I32StoF32, rm,
+                                             binop(Iop_GetElem32x4,
+                                                   mkexpr(t1),
+                                                   mkU8(0))))));
+                     assign(t3,
+                            binop(Iop_32HLto64,
+                                  unop(Iop_ReinterpF32asI32,
+                                       binop(Iop_I32StoF32, rm,
+                                             binop(Iop_GetElem32x4,
+                                                   mkexpr(t1),
+                                                   mkU8(3)))),
+                                  unop(Iop_ReinterpF32asI32,
+                                       binop(Iop_I32StoF32, rm,
+                                             binop(Iop_GetElem32x4,
+                                                   mkexpr(t1),
+                                                   mkU8(2))))));
+                     putWReg(wd,
+                             triop(Iop_Div32Fx4, rm,
+                                   binop(Iop_64HLtoV128,
+                                         mkexpr(t3), mkexpr(t2)),
+                                   binop(Iop_64HLtoV128,
+                                         mkU64(0x4700000047000000),
+                                         mkU64(0x4700000047000000))));
+                     break;
+                  }
+
+               case 0x01: { /* FFQR.D */
+                     DIP("FFQR.D w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wt, FFQRD, 1);
+                     t1 = newTemp(Ity_V128);
+                     t2 = newTemp(Ity_I64);
+                     t3 = newTemp(Ity_I64);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     assign(t1,
+                            binop(Iop_SarN64x2,
+                                  binop(Iop_InterleaveLO32x4,
+                                        getWReg(ws),
+                                        getWReg(ws)),
+                                  mkU8(32)));
+                     assign(t2,
+                            unop(Iop_ReinterpF64asI64,
+                                 binop(Iop_I64StoF64, rm,
+                                       unop(Iop_V128to64,
+                                            mkexpr(t1)))));
+                     assign(t3,
+                            unop(Iop_ReinterpF64asI64,
+                                 binop(Iop_I64StoF64, rm,
+                                       unop(Iop_V128HIto64,
+                                            mkexpr(t1)))));
+                     putWReg(wd,
+                             triop(Iop_Div64Fx2, rm,
+                                   binop(Iop_64HLtoV128,
+                                         mkexpr(t3), mkexpr(t2)),
+                                   binop(Iop_64HLtoV128,
+                                         mkU64(0x41E0000000000000),
+                                         mkU64(0x41E0000000000000))));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x19C: { /* FTINT_S.df */
+            switch (df) { /* FTINT_S.W */
+               case 0x00: {
+                     DIP("FTINT_S.W w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wd, FTINT_SW, 1);
+                     t1 = newTemp(Ity_I64);
+                     t2 = newTemp(Ity_I64);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_I32);
+                     assign(t3,
+                         binop(Iop_AndV128,
+                               unop(Iop_NotV128,
+                                    binop(Iop_CmpUN32Fx4,
+                                          getWReg(ws),
+                                          getWReg(ws))),
+                               binop(Iop_Max32Fx4,
+                                     getWReg(ws),
+                                     binop(Iop_64HLtoV128,
+                                        mkU64(0xCF000000CF000000),
+                                        mkU64(0xCF000000CF000000)))));
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     assign(t1,
+                         binop(Iop_32HLto64,
+                               unop(Iop_ReinterpF32asI32,
+                                    binop(Iop_RoundF32toInt, rm,
+                                          unop(Iop_ReinterpI32asF32,
+                                               binop(Iop_GetElem32x4,
+                                                     mkexpr(t3),
+                                                     mkU8(1))))),
+                               unop(Iop_ReinterpF32asI32,
+                                    binop(Iop_RoundF32toInt, rm,
+                                          unop(Iop_ReinterpI32asF32,
+                                               binop(Iop_GetElem32x4,
+                                                     mkexpr(t3),
+                                                     mkU8(0)))))));
+                     assign(t2,
+                         binop(Iop_32HLto64,
+                               unop(Iop_ReinterpF32asI32,
+                                    binop(Iop_RoundF32toInt, rm,
+                                          unop(Iop_ReinterpI32asF32,
+                                               binop(Iop_GetElem32x4,
+                                                     mkexpr(t3),
+                                                     mkU8(3))))),
+                               unop(Iop_ReinterpF32asI32,
+                                    binop(Iop_RoundF32toInt, rm,
+                                          unop(Iop_ReinterpI32asF32,
+                                               binop(Iop_GetElem32x4,
+                                                     mkexpr(t3),
+                                                     mkU8(2)))))));
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   mkexpr(t2), mkexpr(t1)));
+                     break;
+                  }
+
+               case 0x01: {  /* FTINT_S.D */
+                     DIP("FTINT_S.D w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wd, FTINT_SD, 1);
+                     t1 = newTemp(Ity_I64);
+                     t2 = newTemp(Ity_I64);
+                     t3 = newTemp(Ity_V128);
+                     assign(t3,
+                         binop(Iop_AndV128,
+                               unop(Iop_NotV128,
+                                    binop(Iop_CmpUN64Fx2,
+                                          getWReg(ws),
+                                          getWReg(ws))),
+                               binop(Iop_Max64Fx2,
+                                     getWReg(ws),
+                                     binop(Iop_64HLtoV128,
+                                        mkU64(0xC3E0000000000000),
+                                        mkU64(0xC3E0000000000000)))));
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     assign(t1,
+                            unop(Iop_ReinterpF64asI64,
+                                 binop(Iop_RoundF64toInt, rm,
+                                       unop(Iop_ReinterpI64asF64,
+                                            unop(Iop_V128to64,
+                                                 mkexpr(t3))))));
+                     assign(t2,
+                            unop(Iop_ReinterpF64asI64,
+                                 binop(Iop_RoundF64toInt, rm,
+                                       unop(Iop_ReinterpI64asF64,
+                                            unop(Iop_V128HIto64,
+                                                 mkexpr(t3))))));
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   mkexpr(t2), mkexpr(t1)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x19D: {/* FTINT_U.df */
+            switch (df) { /* FTINT_U.W */
+               case 0x00: {
+                     DIP("FTINT_U.W w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wd, FTINT_UW, 1);
+                     t1 = newTemp(Ity_I64);
+                     t2 = newTemp(Ity_I64);
+                     t3 = newTemp(Ity_V128);
+                     t4 = newTemp(Ity_V128);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     assign(t1,
+                         binop(Iop_32HLto64,
+                               unop(Iop_ReinterpF32asI32,
+                                    binop(Iop_RoundF32toInt, rm,
+                                          unop(Iop_ReinterpI32asF32,
+                                               binop(Iop_GetElem32x4,
+                                                     getWReg(ws),
+                                                     mkU8(1))))),
+                               unop(Iop_ReinterpF32asI32,
+                                    binop(Iop_RoundF32toInt, rm,
+                                          unop(Iop_ReinterpI32asF32,
+                                               binop(Iop_GetElem32x4,
+                                                     getWReg(ws),
+                                                     mkU8(0)))))));
+                     assign(t2,
+                         binop(Iop_32HLto64,
+                               unop(Iop_ReinterpF32asI32,
+                                    binop(Iop_RoundF32toInt, rm,
+                                          unop(Iop_ReinterpI32asF32,
+                                               binop(Iop_GetElem32x4,
+                                                     getWReg(ws),
+                                                     mkU8(3))))),
+                               unop(Iop_ReinterpF32asI32,
+                                    binop(Iop_RoundF32toInt, rm,
+                                          unop(Iop_ReinterpI32asF32,
+                                               binop(Iop_GetElem32x4,
+                                                     getWReg(ws),
+                                                     mkU8(2)))))));
+                     assign(t3,
+                            unop(Iop_NotV128,
+                                 binop(Iop_SarN32x4,
+                                       getWReg(ws),
+                                       mkU8(31))));
+                     assign(t4,
+                            binop(Iop_CmpLT32Fx4,
+                                  getWReg(ws),
+                                  binop(Iop_64HLtoV128,
+                                        mkU64(0x4EFFFFFF4EFFFFFF),
+                                        mkU64(0x4EFFFFFF4EFFFFFF))));
+                     putWReg(wd,
+                             binop(Iop_OrV128,
+                                   binop(Iop_AndV128,
+                                         mkexpr(t4),
+                                         binop(Iop_AndV128,
+                                               binop(Iop_64HLtoV128,
+                                                     mkexpr(t2),
+                                                     mkexpr(t1)),
+                                               mkexpr(t3))),
+                                   binop(Iop_AndV128,
+                                         unop(Iop_NotV128, mkexpr(t4)),
+                                         unop(Iop_FtoI32Ux4_RZ,
+                                              getWReg(ws)))));
+                     break;
+                  }
+
+               case 0x01: {  /* FTINT_U.D */
+                     DIP("FTINT_U.D w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wd, FTINT_UD, 1);
+                     t1 = newTemp(Ity_I64);
+                     t2 = newTemp(Ity_I64);
+                     IRExpr *rm = get_IR_roundingmode_MSA();
+                     assign(t1,
+                            binop(Iop_F64toI64U, rm,
+                                  unop(Iop_ReinterpI64asF64,
+                                       unop(Iop_V128to64,
+                                            getWReg(ws)))));
+                     assign(t2,
+                            binop(Iop_F64toI64U, rm,
+                                  unop(Iop_ReinterpI64asF64,
+                                       unop(Iop_V128HIto64,
+                                            getWReg(ws)))));
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   mkexpr(t2), mkexpr(t1)));
+                     break;
+                  }
+
+               default:
+                  return -1;
+            }
+
+            break;
+         }
+
+      case 0x19E: { /* FFINT_S.df */
+            t1 = newTemp(Ity_V128);
+            assign(t1, getWReg(ws));
+            IRExpr *rm = get_IR_roundingmode_MSA();
+
+            switch (df) {
+               case 0x00: { /* FFINT_S.W */
+                     DIP("FFINT_S.W w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wt, FFINTSW, 1);
+                     IRTemp tmp[4];
+                     Int i;
+
+                     for (i = 0; i < 4; i++) {
+                        tmp[i] = newTemp(Ity_F32);
+                        assign(tmp[i],
+                               binop(Iop_I32StoF32, rm,
+                                     binop(Iop_GetElem32x4,
+                                           mkexpr(t1), mkU8(i))));
+                     }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   binop(Iop_32HLto64,
+                                         unop(Iop_ReinterpF32asI32,
+                                              mkexpr(tmp[3])),
+                                         unop(Iop_ReinterpF32asI32,
+                                              mkexpr(tmp[2]))),
+                                   binop(Iop_32HLto64,
+                                         unop(Iop_ReinterpF32asI32,
+                                              mkexpr(tmp[1])),
+                                         unop(Iop_ReinterpF32asI32,
+                                              mkexpr(tmp[0])))));
+                     break;
+                  }
 
-                        putIReg(rt, binop(Iop_Or32,
-                                          mkexpr(t1),
-                                          binop(Iop_And32,
-                                                getIReg(rs), mkexpr(t2))));
+               case 0x01: { /* FFINT_S.D */
+                     DIP("FFINT_S.D w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wt, FFINTSD, 1);
+                     IRTemp tmp[2];
+                     Int i;
+
+                     for (i = 0; i < 2; i++) {
+                        tmp[i] = newTemp(Ity_F64);
+                        assign(tmp[i],
+                               binop(Iop_I64StoF64, rm,
+                                     binop(Iop_GetElem64x2,
+                                           mkexpr(t1), mkU8(i))));
                      }
+
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   unop(Iop_ReinterpF64asI64,
+                                        mkexpr(tmp[1])),
+                                   unop(Iop_ReinterpF64asI64,
+                                        mkexpr(tmp[0]))));
                      break;
                   }
-                  case 0x1: {  /* PREPEND */
-                     DIP("prepend r%u, r%u, %u", rt, rs, rd);
-                     vassert(!mode64);
-                     t1 = newTemp(Ity_I32);
-                     t2 = newTemp(Ity_I32);
-                     t3 = newTemp(Ity_I32);
 
-                     if (0 != rd) {
-                        assign(t1, binop(Iop_Shr32, getIReg(rt), mkU8(rd)));
+               default:
+                  return -1;
+            }
 
-                        if (31 == rd) {
-                           putIReg(rt, binop(Iop_Or32,
-                                             mkexpr(t1),
-                                             binop(Iop_Shl32,
-                                                   binop(Iop_And32,
-                                                         getIReg(rs),
-                                                         mkU32(0x7fffffff)),
-                                                   mkU8(1))));
-                        } else if (1 == rd) {
-                           putIReg(rt, binop(Iop_Or32,
-                                             mkexpr(t1),
-                                             binop(Iop_Shl32,
-                                                   binop(Iop_And32,
-                                                         getIReg(rs),
-                                                         mkU32(0x1)),
-                                                   mkU8(31))));
-                        } else {
-                           assign(t2, binop(Iop_Add32, mkU32(rd), mkU32(0x1)));
+            break;
+         }
 
-                           assign(t3, unop(Iop_Not32,
-                                           binop(Iop_Shl32,
-                                                 mkU32(0xffffffff),
-                                                 unop(Iop_32to8, mkexpr(t2)))));
+      case 0x19F: { /* FFINT_U.df */
+            IRExpr *rm = get_IR_roundingmode_MSA();
 
-                           putIReg(rt, binop(Iop_Or32,
-                                             mkexpr(t1),
-                                             binop(Iop_Shl32,
-                                                   binop(Iop_And32,
-                                                         getIReg(rs),
-                                                         mkexpr(t3)),
-                                                   mkU8(32-rd))));
-                        }
-                     }
+            switch (df) {
+               case 0x00: { /* FFINT_U.W */
+                     DIP("FFINT_U.W w%d, w%d", wd, ws);
+                     calculateMSACSR(ws, wt, FFINT_UW, 1);
+                     putWReg(wd, unop(Iop_I32UtoFx4, getWReg(ws)));
                      break;
                   }
-                  case 0x10: {  /* BALIGN */
-                     DIP("balign r%u, r%u, %u", rt, rs, rd);
-                     vassert(!mode64);
-                     t1 = newTemp(Ity_I32);
-                     t2 = newTemp(Ity_I32);
-                     t3 = newTemp(Ity_I32);
 
-                     if ((2 != rd) && (0 != rd)) {
-                        assign(t1, binop(Iop_Shl32,
-                                         binop(Iop_And32,
-                                               mkU32(rd), mkU32(0x3)),
-                                         mkU8(0x3)));
-                        assign(t2, binop(Iop_Shl32,
-                                         getIReg(rt),
-                                         unop(Iop_32to8, mkexpr(t1))));
-                        assign(t3, binop(Iop_Shr32,
-                                         getIReg(rs),
-                                         unop(Iop_32to8,
-                                              binop(Iop_Shl32,
-                                                    binop(Iop_Sub32,
-                                                          mkU32(0x4),
-                                                          binop(Iop_And32,
-                                                                mkU32(rd),
-                                                                mkU32(0x3))),
-                                                    mkU8(0x3)))));
-                        putIReg(rt, binop(Iop_Or32, mkexpr(t2), mkexpr(t3)));
-                     }
+               case 0x01: { /* FFINT_U.D */
+                     DIP("FFINT_U.D w%d, w%d",
+                         wd, ws);
+                     calculateMSACSR(ws, wt,
+                                     FFINT_UD, 1);
+                     t1 = newTemp(Ity_I64);
+                     t2 = newTemp(Ity_I64);
+                     assign(t1,
+                            unop(Iop_ReinterpF64asI64,
+                                 binop(Iop_I64UtoF64, rm,
+                                       unop(Iop_V128to64,
+                                            getWReg(ws)))));
+                     assign(t2,
+                            unop(Iop_ReinterpF64asI64,
+                                 binop(Iop_I64UtoF64, rm,
+                                       unop(Iop_V128HIto64,
+                                            getWReg(ws)))));
+                     putWReg(wd,
+                             binop(Iop_64HLtoV128,
+                                   mkexpr(t2), mkexpr(t1)));
                      break;
                   }
-                  default:
-                     return -1;
-               }
-               break;  /* end of APPEND */
+
+               default:
+                  return -1;
             }
-            default:
-               return -1;
+
+            break;
          }
-         break;
-      }
+
       default:
-            return -1;
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_MI10_load(UInt cins, UChar wd, UChar ws) { /* MI10 (0x20) */
+   IRTemp t1;
+   UShort i10;
+   UChar df;
+
+   i10 = (cins & 0x03FF0000) >> 16;
+   df = cins & 0x00000003;
+
+   switch (df) {
+      case 0x00: { /* LD.B */
+            DIP("LD.B w%d, %d(r%d)", wd, ws, i10);
+            LOAD_STORE_PATTERN_MSA(i10);
+            putWReg(wd, load(Ity_V128, mkexpr(t1)));
+            break;
+         }
+
+      case 0x01: { /* LD.H */
+            DIP("LD.H w%d, %d(r%d)", wd, ws, i10);
+            LOAD_STORE_PATTERN_MSA(i10 << 1);
+#if defined (_MIPSEL)
+            putWReg(wd, load(Ity_V128, mkexpr(t1)));
+#elif defined (_MIPSEB)
+            putWReg(wd,
+                    unop(Iop_Reverse8sIn16_x8,
+                         load(Ity_V128, mkexpr(t1))));
+#endif
+            break;
+         }
+
+      case 0x02: { /* LD.W */
+            DIP("LD.W w%d, %d(r%d)", wd, ws, i10);
+            LOAD_STORE_PATTERN_MSA(i10 << 2);
+#if defined (_MIPSEL)
+            putWReg(wd, load(Ity_V128, mkexpr(t1)));
+#elif defined (_MIPSEB)
+            putWReg(wd,
+                    unop(Iop_Reverse8sIn32_x4,
+                         load(Ity_V128, mkexpr(t1))));
+#endif
+            break;
+         }
+
+      case 0x03: { /* LD.D */
+            DIP("LD.D w%d, %d(r%d)", wd, ws, i10);
+            LOAD_STORE_PATTERN_MSA(i10 << 3);
+#if defined (_MIPSEL)
+            putWReg(wd, load(Ity_V128, mkexpr(t1)));
+#elif defined (_MIPSEB)
+            putWReg(wd,
+                    unop(Iop_Reverse8sIn64_x2,
+                         load(Ity_V128, mkexpr(t1))));
+#endif
+            break;
+         }
+
+      default:
+         return -1;
+   }
+
+   return 0;
+}
+
+static Int msa_MI10_store(UInt cins, UChar wd, UChar ws) { /* MI10 (0x24) */
+   IRTemp t1;
+   UShort i10;
+   UChar df;
+
+   df = cins & 0x00000003;
+   i10 = (cins & 0x03FF0000) >> 16;
+
+   switch (df) {
+      case 0x00: { /* ST.B */
+            DIP("ST.B w%d, %d(r%d)", wd, ws, i10);
+            LOAD_STORE_PATTERN_MSA(i10);
+            store(mkexpr(t1), getWReg(wd));
+            break;
+         }
+
+      case 0x01: { /* ST.H */
+            DIP("ST.H w%d, %d(r%d)", wd, ws, i10);
+            LOAD_STORE_PATTERN_MSA(i10 << 1);
+#if defined (_MIPSEL)
+            store(mkexpr(t1), getWReg(wd));
+#elif defined (_MIPSEB)
+            store(mkexpr(t1),
+                  unop(Iop_Reverse8sIn16_x8, getWReg(wd)));
+#endif
+            break;
+         }
+
+      case 0x02: { /* ST.W */
+            DIP("ST.W w%d, %d(r%d)", wd, ws, i10);
+            LOAD_STORE_PATTERN_MSA(i10 << 2);
+#if defined (_MIPSEL)
+            store(mkexpr(t1), getWReg(wd));
+#elif defined (_MIPSEB)
+            store(mkexpr(t1),
+                  unop(Iop_Reverse8sIn32_x4, getWReg(wd)));
+#endif
+            break;
+         }
+
+      case 0x03: { /* ST.D */
+            DIP("ST.D w%d, %d(r%d)", wd, ws, i10);
+            LOAD_STORE_PATTERN_MSA(i10 << 3);
+#if defined (_MIPSEL)
+            store(mkexpr(t1), getWReg(wd));
+#elif defined (_MIPSEB)
+            store(mkexpr(t1),
+                  unop(Iop_Reverse8sIn64_x2, getWReg(wd)));
+#endif
+            break;
+         }
+
+      default:
+         return -1;
    }
+
    return 0;
 }
 
+/*------------------------------------------------------------*/
+/*---   Disassemble a single MIPS MSA (SIMD) instruction   ---*/
+/*---   Return values:                                     ---*/
+/*---       0: Success                                     ---*/
+/*---      -1: Decode failure (unknown instruction)        ---*/
+/*---      -2: Illegal instruction                         ---*/
+/*------------------------------------------------------------*/
+static Int disMSAInstr_MIPS_WRK ( UInt cins ) {
+   UChar minor_opcode, wd, ws;
+
+   vassert(has_msa);
+   vassert((cins & 0xFC000000) == 0x78000000);
+
+   minor_opcode = (cins & 0x20) > 0 ? (cins & 0x3C) : (cins & 0x3F);
+   wd = (cins & 0x000007C0) >> 6;
+   ws = (cins & 0x0000F800) >> 11;
+
+   switch (minor_opcode) {
+      case 0x0:
+         return msa_I8_logical(cins, wd, ws);
+
+      case 0x01:
+         return msa_I8_branch(cins, wd, ws);
+
+      case 0x02:
+         return msa_I8_shift(cins, wd, ws);
+
+      case 0x06:
+         return msa_I5_06(cins, wd, ws);
+
+      case 0x07:
+         return msa_I5_07(cins, wd, ws);
+
+      case 0x09:
+         return msa_BIT_09(cins, wd, ws);
+
+      case 0x0A:
+         return msa_BIT_0A(cins, wd, ws);
+
+      case 0x0D:
+         return msa_3R_0D(cins, wd, ws);
+
+      case 0x0E:
+         return msa_3R_0E(cins, wd, ws);
+
+      case 0x0F:
+         return msa_3R_0F(cins, wd, ws);
+
+      case 0x10:
+         return msa_3R_10(cins, wd, ws);
+
+      case 0x11:
+         return msa_3R_11(cins, wd, ws);
+
+      case 0x12:
+         return msa_3R_12(cins, wd, ws);
+
+      case 0x13:
+         return msa_3R_13(cins, wd, ws);
+
+      case 0x14:
+         return msa_3R_14(cins, wd, ws);
+
+      case 0x15:
+         return msa_3R_15(cins, wd, ws);
+
+      case 0x19:
+         return msa_ELM(cins, wd, ws);
+
+      case 0x1A:
+         return msa_3R_1A(cins, wd, ws);
+
+      case 0x1B:
+         return msa_3R_1B(cins, wd, ws);
+
+      case 0x1C:
+         return msa_3R_1C(cins, wd, ws);
+
+      case 0x1E:
+         if ((cins & 0x03000000) == 0)
+            return msa_VEC(cins, wd, ws);
+         else if ((cins & 0x00200000) == 0)
+            return msa_2R(cins, wd, ws);
+         else
+            return msa_2RF(cins, wd, ws);
+
+      case 0x20:
+         return msa_MI10_load(cins, wd, ws);
+
+      case 0x24:
+         return msa_MI10_store(cins, wd, ws);
+   }
+
+   return -1;
+}
+
 /*------------------------------------------------------------*/
 /*---          Disassemble a single instruction            ---*/
 /*------------------------------------------------------------*/
@@ -12380,6 +26066,129 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
             break;
          } else
             goto decode_failure;
+      } else if (fmt >= 0x1c && has_msa) { /* BNZ.df */
+         Int df = fmt & 3;
+         t0 = newTemp(Ity_I32);
+         t1 = newTemp(Ity_V128);
+         t2 = newTemp(Ity_V128);
+         t3 = newTemp(Ity_V128);
+         assign(t1, getWReg(ft));
+         assign(t2, binop(Iop_64HLtoV128, mkU64(0), mkU64(0)));
+
+         switch (df) {
+            case 0x00: { /* BNZ.B */
+                  DIP("BNZ.B w%d, %d", ft, imm);
+                  assign(t3, binop(Iop_CmpEQ8x16, mkexpr(t1), mkexpr(t2)));
+                  break;
+               }
+
+            case 0x01: { /* BNZ.H */
+                  DIP("BNZ.H w%d, %d", ft, imm);
+                  assign(t3, binop(Iop_CmpEQ16x8, mkexpr(t1), mkexpr(t2)));
+                  break;
+               }
+
+            case 0x02: { /* BNZ.W */
+                  DIP("BNZ.W w%d, %d", ft, imm);
+                  assign(t3, binop(Iop_CmpEQ32x4, mkexpr(t1), mkexpr(t2)));
+                  break;
+               }
+
+            case 0x03: { /* BNZ.D */
+                  DIP("BNZ.D w%d, %d", ft, imm);
+                  assign(t3, binop(Iop_CmpEQ64x2, mkexpr(t1), mkexpr(t2)));
+                  break;
+               }
+         }
+
+         assign(t0,
+                binop(Iop_Or32,
+                      binop(Iop_Or32,
+                            unop(Iop_V128to32, mkexpr(t3)),
+                            unop(Iop_64HIto32, unop(Iop_V128to64, mkexpr(t3)))),
+                      binop(Iop_Or32,
+                            unop(Iop_64to32,
+                                 unop(Iop_V128HIto64, mkexpr(t3))),
+                            unop(Iop_64HIto32,
+                                 unop(Iop_V128HIto64, mkexpr(t3))))));
+         dis_branch(False,
+                    binop(Iop_CmpEQ32, mkexpr(t0), mkU32(0)), imm, &bstmt);
+      } else if (fmt == 0x0F && has_msa) { /* BNZ.V */
+         t0 = newTemp(Ity_I32);
+         t1 = newTemp(Ity_V128);
+         assign(t1, getWReg(ft));
+         assign(t0,
+                binop(Iop_Or32,
+                      binop(Iop_Or32,
+                            unop(Iop_V128to32, mkexpr(t1)),
+                            unop(Iop_64HIto32, unop(Iop_V128to64, mkexpr(t1)))),
+                      binop(Iop_Or32,
+                            unop(Iop_64to32, unop(Iop_V128HIto64, mkexpr(t1))),
+                            unop(Iop_64HIto32,
+                                 unop(Iop_V128HIto64, mkexpr(t1))))));
+         dis_branch(False,
+                    binop(Iop_CmpNE32, mkexpr(t0), mkU32(0)), imm, &bstmt);
+      } else if (fmt >= 0x18 && has_msa) { /* BZ.df */
+         Int df = fmt & 3;
+         t0 = newTemp(Ity_I32);
+         t1 = newTemp(Ity_V128);
+         t2 = newTemp(Ity_V128);
+         t3 = newTemp(Ity_V128);
+         assign(t1, getWReg(ft));
+         assign(t2, binop(Iop_64HLtoV128, mkU64(0), mkU64(0)));
+
+         switch (df) {
+            case 0x00: { /* BZ.B */
+                  DIP("BZ.B w%d, %d", ft, imm);
+                  assign(t3, binop(Iop_CmpEQ8x16, mkexpr(t1), mkexpr(t2)));
+                  break;
+               }
+
+            case 0x01: { /* BZ.H */
+                  DIP("BZ.H w%d, %d", ft, imm);
+                  assign(t3, binop(Iop_CmpEQ16x8, mkexpr(t1), mkexpr(t2)));
+                  break;
+               }
+
+            case 0x02: { /* BZ.W */
+                  DIP("BZ.W w%d, %d", ft, imm);
+                  assign(t3, binop(Iop_CmpEQ32x4, mkexpr(t1), mkexpr(t2)));
+                  break;
+               }
+
+            case 0x03: { /* BZ.D */
+                  DIP("BZ.D w%d, %d", ft, imm);
+                  assign(t3, binop(Iop_CmpEQ64x2, mkexpr(t1), mkexpr(t2)));
+                  break;
+               }
+         }
+
+         assign(t0,
+                binop(Iop_Or32,
+                      binop(Iop_Or32,
+                            unop(Iop_V128to32, mkexpr(t3)),
+                            unop(Iop_64HIto32, unop(Iop_V128to64, mkexpr(t3)))),
+                      binop(Iop_Or32,
+                            unop(Iop_64to32, unop(Iop_V128HIto64, mkexpr(t3))),
+                            unop(Iop_64HIto32,
+                                 unop(Iop_V128HIto64, mkexpr(t3))))));
+         dis_branch(False,
+                    binop(Iop_CmpNE32, mkexpr(t0), mkU32(0)), imm, &bstmt);
+      } else if (fmt == 0x0B && has_msa) { /* BZ.V */
+         t0 = newTemp(Ity_I32);
+         t1 = newTemp(Ity_V128);
+         assign(t1, getWReg(ft));
+         assign(t0,
+                binop(Iop_Or32,
+                      binop(Iop_Or32,
+                            unop(Iop_V128to32, mkexpr(t1)),
+                            unop(Iop_64HIto32, unop(Iop_V128to64, mkexpr(t1)))),
+                      binop(Iop_Or32,
+                            unop(Iop_64to32, unop(Iop_V128HIto64, mkexpr(t1))),
+                            unop(Iop_64HIto32,
+                                 unop(Iop_V128HIto64, mkexpr(t1))))));
+         dis_branch(False,
+                    binop(Iop_CmpEQ32, mkexpr(t0), mkU32(0)), imm, &bstmt);
       } else {
          switch (function) {
             case 0x4: {  /* SQRT.fmt */
@@ -13676,47 +27485,45 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
          DIP("madd.s f%u, f%u, f%u, f%u", fd, fmt, fs, ft);
          IRExpr *rm = get_IR_roundingmode();
          t1 = newTemp(Ity_F32);
-         assign(t1, qop(Iop_MAddF32, rm,
-                        getLoFromF64(tyF, getFReg(fmt)),
-                        getLoFromF64(tyF, getFReg(fs)),
-                        getLoFromF64(tyF, getFReg(ft))));
+         assign(t1, triop(Iop_AddF32, rm, getLoFromF64(tyF, getFReg(fmt)),
+            triop(Iop_MulF32, rm, getLoFromF64(tyF, getFReg(fs)),
+               getLoFromF64(tyF, getFReg(ft)))));
          putFReg(fd, mkWidenFromF32(tyF, mkexpr(t1)));
          break;  /* MADD.S */
       }
       case 0x21: {  /* MADD.D */
          DIP("madd.d f%u, f%u, f%u, f%u", fd, fmt, fs, ft);
          IRExpr *rm = get_IR_roundingmode();
-         putDReg(fd, qop(Iop_MAddF64, rm, getDReg(fmt), getDReg(fs),
-                         getDReg(ft)));
+         putDReg(fd, triop(Iop_AddF64, rm, getDReg(fmt),
+            triop(Iop_MulF64, rm, getDReg(fs),
+                         getDReg(ft))));
          break;  /* MADD.D */
       }
       case 0x28: {  /* MSUB.S */
          DIP("msub.s f%u, f%u, f%u, f%u", fd, fmt, fs, ft);
          IRExpr *rm = get_IR_roundingmode();
          t1 = newTemp(Ity_F32);
-         assign(t1, qop(Iop_MSubF32, rm,
-                        getLoFromF64(tyF, getFReg(fmt)),
-                        getLoFromF64(tyF, getFReg(fs)),
-                        getLoFromF64(tyF, getFReg(ft))));
+         assign(t1, triop(Iop_SubF32, rm,
+            triop(Iop_MulF32, rm, getLoFromF64(tyF, getFReg(fs)),
+               getLoFromF64(tyF, getFReg(ft))),
+             getLoFromF64(tyF, getFReg(fmt))));
          putFReg(fd, mkWidenFromF32(tyF, mkexpr(t1)));
          break;  /* MSUB.S */
       }
       case 0x29: {  /* MSUB.D */
          DIP("msub.d f%u, f%u, f%u, f%u", fd, fmt, fs, ft);
          IRExpr *rm = get_IR_roundingmode();
-         putDReg(fd, qop(Iop_MSubF64, rm, getDReg(fmt), getDReg(fs),
-                         getDReg(ft)));
+         putDReg(fd, triop(Iop_SubF64, rm, triop(Iop_MulF64, rm, getDReg(fs),
+                         getDReg(ft)), getDReg(fmt)));
          break;  /* MSUB.D */
       }
       case 0x30: {  /* NMADD.S */
          DIP("nmadd.s f%u, f%u, f%u, f%u", fd, fmt, fs, ft);
          IRExpr *rm = get_IR_roundingmode();
          t1 = newTemp(Ity_F32);
-         assign(t1, qop(Iop_MAddF32, rm,
-                        getLoFromF64(tyF, getFReg(fmt)),
-                        getLoFromF64(tyF, getFReg(fs)),
-                        getLoFromF64(tyF, getFReg(ft))));
-
+         assign(t1, triop(Iop_AddF32, rm, getLoFromF64(tyF, getFReg(fmt)),
+            triop(Iop_MulF32, rm, getLoFromF64(tyF, getFReg(fs)),
+               getLoFromF64(tyF, getFReg(ft)))));
          putFReg(fd, mkWidenFromF32(tyF, unop(Iop_NegF32, mkexpr(t1))));
          break;  /* NMADD.S */
       }
@@ -13724,8 +27531,9 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
          DIP("nmadd.d f%u, f%u, f%u, f%u", fd, fmt, fs, ft);
          IRExpr *rm = get_IR_roundingmode();
          t1 = newTemp(Ity_F64);
-         assign(t1, qop(Iop_MAddF64, rm, getDReg(fmt), getDReg(fs),
-                        getDReg(ft)));
+         assign(t1, triop(Iop_AddF64, rm, getDReg(fmt),
+            triop(Iop_MulF64, rm, getDReg(fs),
+                         getDReg(ft))));
          putDReg(fd, unop(Iop_NegF64, mkexpr(t1)));
          break;  /* NMADD.D */
       }
@@ -13733,11 +27541,10 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
          DIP("nmsub.s f%u, f%u, f%u, f%u", fd, fmt, fs, ft);
          IRExpr *rm = get_IR_roundingmode();
          t1 = newTemp(Ity_F32);
-         assign(t1, qop(Iop_MSubF32, rm,
-                        getLoFromF64(tyF, getFReg(fmt)),
-                        getLoFromF64(tyF, getFReg(fs)),
-                        getLoFromF64(tyF, getFReg(ft))));
-
+         assign(t1, triop(Iop_SubF32, rm,
+            triop(Iop_MulF32, rm, getLoFromF64(tyF, getFReg(fs)),
+               getLoFromF64(tyF, getFReg(ft))),
+             getLoFromF64(tyF, getFReg(fmt))));
          putFReg(fd, mkWidenFromF32(tyF, unop(Iop_NegF32, mkexpr(t1))));
          break;  /* NMSUBB.S */
       }
@@ -13745,8 +27552,8 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
          DIP("nmsub.d f%u, f%u, f%u, f%u", fd, fmt, fs, ft);
          IRExpr *rm = get_IR_roundingmode();
          t1 = newTemp(Ity_F64);
-         assign(t1, qop(Iop_MSubF64, rm, getDReg(fmt), getDReg(fs),
-                        getDReg(ft)));
+         assign(t1, triop(Iop_SubF64, rm, triop(Iop_MulF64, rm, getDReg(fs),
+                         getDReg(ft)), getDReg(fmt)));
          putDReg(fd, unop(Iop_NegF64, mkexpr(t1)));
          break;  /* NMSUBB.D */
       }
@@ -16069,6 +29876,38 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
          }
       }
 
+      case 0x05:  /* LSA */
+         if (has_msa) {
+            UInt imm2 = (imm & 0xC0) >> 6;
+            DIP("lsa r%u, r%u, r%u, imm: 0x%x", rd, rs, rt, imm2);
+            if (mode64) {
+               putIReg(rd,
+                       unop(Iop_32Sto64,
+                            binop(Iop_Add32,
+                                  binop(Iop_Shl32,
+                                        unop(Iop_64to32, getIReg(rs)),
+                                        mkU8(imm2 + 1)),
+                                  unop(Iop_64to32, getIReg(rt)))));
+            } else {
+               putIReg(rd,
+                       binop(Iop_Add32,
+                             binop(Iop_Shl32, getIReg(rs), mkU8(imm2 + 1)),
+                             getIReg(rt)));
+            }
+         } else {
+            ILLEGAL_INSTRUCTON;
+         }
+         break;
+      case 0x15:{ /* DLSA */
+         UInt imm2 = (imm & 0xC0) >> 6;
+         DIP("dlsa r%u, r%u, r%u, imm: 0x%x", rd, rs, rt, imm2);
+         putIReg(rd,
+                 binop(Iop_Add64,
+                       binop(Iop_Shl64, getIReg(rs), mkU8(imm2 + 1)),
+                       getIReg(rt)));
+         break;
+      }
+
       case 0x0D:  /* BREAK */
          DIP("break 0x%x", trap_code);
          if (mode64)
@@ -17165,6 +31004,20 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *,
          goto decode_failure;
       }
 
+   case 0x1E: /* MIPS MSA (SIMD) */
+      if (has_msa) {
+         Int retVal = disMSAInstr_MIPS_WRK(cins);
+         if (retVal == 0) {
+            break;
+         } else if (retVal == -2) {
+            ILLEGAL_INSTRUCTON
+            break;
+         }
+      }
+      vex_printf("Error occured while trying to decode MIPS MSA "
+                 "instruction.\nYour platform probably doesn't support "
+                 "MIPS MSA (SIMD) ASE.\n");
+
    default:
       goto decode_failure;
 
@@ -17296,6 +31149,7 @@ DisResult disInstr_MIPS( IRSB*        irsb_IN,
 
    mode64 = guest_arch != VexArchMIPS32;
    fp_mode64 = abiinfo->guest_mips_fp_mode64;
+   has_msa = VEX_MIPS_PROC_MSA(archinfo->hwcaps);
 
    vassert(VEX_MIPS_HOST_FP_MODE(archinfo->hwcaps) >= fp_mode64);
 
index 35a293b7227d6e04f65ee08943e7b23726b71a63..f62a410d1116d710de2299d93930b906c3a2277a 100644 (file)
@@ -91,6 +91,25 @@ const RRegUniverse* getRRegUniverse_MIPS ( Bool mode64 )
    ru->regs[ru->size++] = hregMIPS_F30(mode64);
    ru->allocable_end[(mode64) ? HRcFlt64 : HRcFlt32] = ru->size - 1;
 
+   ru->allocable_start[HRcVec128] = ru->size;
+   ru->regs[ru->size++] = hregMIPS_W16(mode64);
+   ru->regs[ru->size++] = hregMIPS_W17(mode64);
+   ru->regs[ru->size++] = hregMIPS_W18(mode64);
+   ru->regs[ru->size++] = hregMIPS_W19(mode64);
+   ru->regs[ru->size++] = hregMIPS_W20(mode64);
+   ru->regs[ru->size++] = hregMIPS_W21(mode64);
+   ru->regs[ru->size++] = hregMIPS_W22(mode64);
+   ru->regs[ru->size++] = hregMIPS_W23(mode64);
+   ru->regs[ru->size++] = hregMIPS_W24(mode64);
+   ru->regs[ru->size++] = hregMIPS_W25(mode64);
+   ru->regs[ru->size++] = hregMIPS_W26(mode64);
+   ru->regs[ru->size++] = hregMIPS_W27(mode64);
+   ru->regs[ru->size++] = hregMIPS_W28(mode64);
+   ru->regs[ru->size++] = hregMIPS_W29(mode64);
+   ru->regs[ru->size++] = hregMIPS_W30(mode64);
+   ru->regs[ru->size++] = hregMIPS_W31(mode64);
+   ru->allocable_end[HRcVec128] = ru->size - 1;
+
    if (!mode64) {
       /* Fake double floating point */
       ru->allocable_start[HRcFlt64] = ru->size;
@@ -157,6 +176,13 @@ UInt ppHRegMIPS(HReg reg, Bool mode64)
       "$d8", "$d9", "$d10", "$d11", "$d12", "$d13", "$d14", "$d15",
    };
 
+   static const HChar *fvec128_names[32]
+       = { "$w0", "$w1", "$w2", "$w3", "$w4", "$w5", "$w6", "$w7",
+      "$w8", "$w9", "$w10", "$w11", "$w12", "$w13", "$w14", "$w15",
+      "$w16", "$w17", "$w18", "$w19", "$w20", "$w21", "$w22", "$w23",
+      "$w24", "$w24", "$w26", "$w27", "$w28", "$w29", "$w30", "$w31"
+   };
+
    /* Be generic for all virtual regs. */
    if (hregIsVirtual(reg)) {
       return ppHReg(reg);
@@ -164,7 +190,8 @@ UInt ppHRegMIPS(HReg reg, Bool mode64)
 
    /* But specific for real regs. */
    vassert(hregClass(reg) == HRcInt32 || hregClass(reg) == HRcInt64 ||
-           hregClass(reg) == HRcFlt32 || hregClass(reg) == HRcFlt64);
+           hregClass(reg) == HRcFlt32 || hregClass(reg) == HRcFlt64 ||
+           hregClass(reg) == HRcVec128);
 
    /* But specific for real regs. */
    switch (hregClass(reg)) {
@@ -184,6 +211,10 @@ UInt ppHRegMIPS(HReg reg, Bool mode64)
          r = hregEncoding(reg);
          vassert(r >= 0 && r < 32);
          return vex_printf("%s", freg64_names[r]);
+      case HRcVec128:
+         r = hregEncoding(reg);
+         vassert(r >= 0 && r < 32);
+         return vex_printf("%s", fvec128_names[r]);
       default:
          vpanic("ppHRegMIPS");
          break;
@@ -754,6 +785,466 @@ const HChar *showMIPSMaccOp(MIPSMaccOp op, Bool variable)
    return ret;
 }
 
+HChar showMsaDF(MSADF df) {
+   switch (df) {
+      case MSA_B:
+         return 'b';
+
+      case MSA_H:
+         return 'h';
+
+      case MSA_W:
+         return 'w';
+
+      case MSA_D:
+         return 'd';
+   }
+
+   return '?';
+}
+
+HChar showMsaDFF(MSADFFlx df, int op) {
+   switch (df) {
+      case MSA_F_DW:
+         if (op == MSA_MUL_Q || op == MSA_MULR_Q || op == MSA_FEXDO) return 'w';
+         else return 'd';
+
+      case MSA_F_WH:
+         if (op == MSA_MUL_Q || op == MSA_MULR_Q || op == MSA_FEXDO) return 'h';
+         else return 'w';
+   }
+
+   return '?';
+}
+
+const HChar *showMsaMI10op(MSAMI10Op op) {
+   const HChar *ret;
+
+   switch (op) {
+      case MSA_LD:
+         ret = "ld";
+         break;
+
+      case MSA_ST:
+         ret = "st";
+         break;
+
+      default:
+         vpanic("showMsaMI10op");
+         break;
+   }
+
+   return ret;
+}
+
+const HChar *showMsaElmOp(MSAELMOp op) {
+   const HChar *ret;
+
+   switch (op) {
+      case MSA_MOVE:
+         ret = "move.v";
+         break;
+
+      case MSA_INSERT:
+         ret = "insert";
+         break;
+
+      case MSA_COPY_U:
+         ret = "copy_u";
+         break;
+
+      case MSA_COPY_S:
+         ret = "copy_s";
+         break;
+
+      case MSA_SLDI:
+         ret = "sldi";
+         break;
+
+      case MSA_INSVE:
+         ret = "insve";
+         break;
+
+      case MSA_CFCMSA:
+         ret = "cfcmsa";
+         break;
+
+      case MSA_CTCMSA:
+         ret = "ctcmsa";
+         break;
+
+      default:
+         vpanic("showMsaElmOp");
+         break;
+   }
+
+   return ret;
+}
+
+const HChar *showMsa2ROp(MSA2ROp op) {
+   const HChar *ret;
+
+   switch (op) {
+      case MSA_NLZC:
+         ret = "nlzc";
+         break;
+
+      case MSA_NLOC:
+         ret = "nloc";
+         break;
+
+      case MSA_FILL:
+         ret = "fill";
+         break;
+
+      case MSA_PCNT:
+         ret = "pcnt";
+         break;
+
+      default:
+         vpanic("showMsa2ROp");
+         break;
+   }
+
+   return ret;
+}
+
+const HChar *showMsa2RFOp(MSA2RFOp op) {
+   const HChar *ret;
+
+   switch (op) {
+      case MSA_FTRUNC_S:
+         ret = "ftrunc_s";
+         break;
+
+      case MSA_FTRUNC_U:
+         ret = "ftrunc_u";
+         break;
+
+      case MSA_FFINT_S:
+         ret = "ffint_s";
+         break;
+
+      case MSA_FFINT_U:
+         ret = "ffint_u";
+         break;
+
+      case MSA_FSQRT:
+         ret = "fsqrt";
+         break;
+
+      case MSA_FRSQRT:
+         ret = "frsqrt";
+         break;
+
+      case MSA_FRCP:
+         ret = "frcp";
+         break;
+
+      case MSA_FEXUPR:
+         ret = "fexupr";
+         break;
+
+      case MSA_FTINT_U:
+         ret = "ftint_u";
+         break;
+
+      case MSA_FTINT_S:
+         ret = "ftint_s";
+         break;
+
+      case MSA_FLOG2:
+         ret = "flog2";
+         break;
+
+      default:
+         vpanic("showMsa2RFOp");
+         break;
+   }
+
+   return ret;
+}
+
+const HChar *showMsa3ROp(MSA3ROp op) {
+   const HChar *ret;
+
+   switch (op) {
+      case MSA_ADDV:
+         ret = "addv";
+         break;
+
+      case MSA_ADD_A:
+         ret = "add_a";
+         break;
+
+      case MSA_SUBV:
+         ret = "subv";
+         break;
+
+      case MSA_ADDS_S:
+         ret = "adds_s";
+         break;
+
+      case MSA_ADDS_U:
+         ret = "adds_u";
+         break;
+
+      case MSA_SUBS_S:
+         ret = "subs_s";
+         break;
+
+      case MSA_SUBS_U:
+         ret = "subs_u";
+         break;
+
+      case MSA_MAX_S:
+         ret = "max_s";
+         break;
+
+      case MSA_MAX_U:
+         ret = "max_u";
+         break;
+
+      case MSA_MIN_S:
+         ret = "min_s";
+         break;
+
+      case MSA_MIN_U:
+         ret = "min_u";
+         break;
+
+      case MSA_SLL:
+         ret = "sll";
+         break;
+
+      case MSA_SRL:
+         ret = "srl";
+         break;
+
+      case MSA_SRA:
+         ret = "sra";
+         break;
+
+      case MSA_CEQ:
+         ret = "ceq";
+         break;
+
+      case MSA_CLT_S:
+         ret = "clt_s";
+         break;
+
+      case MSA_CLT_U:
+         ret = "clt_u";
+         break;
+
+      case MSA_ILVL:
+         ret = "ilvl";
+         break;
+
+      case MSA_ILVR:
+         ret = "ilvr";
+         break;
+
+      case MSA_ILVEV:
+         ret = "ilvev";
+         break;
+
+      case MSA_ILVOD:
+         ret = "ilvod";
+         break;
+
+      case MSA_PCKEV:
+         ret = "ilvev";
+         break;
+
+      case MSA_PCKOD:
+         ret = "ilvod";
+         break;
+
+      case MSA_AVER_S:
+         ret = "aver_s";
+         break;
+
+      case MSA_AVER_U:
+         ret = "aver_u";
+         break;
+
+      case MSA_SLD:
+         ret = "sld";
+         break;
+
+      case MSA_SPLAT:
+         ret = "splat";
+         break;
+
+      case MSA_MULV:
+         ret = "mulv";
+         break;
+
+      case MSA_DIVS:
+         ret = "divs";
+         break;
+
+      case MSA_DIVU:
+         ret = "divu";
+         break;
+
+      case MSA_VSHF:
+         ret = "vshf";
+         break;
+
+      default:
+         vpanic("showMsa3ROp");
+         break;
+   }
+
+   return ret;
+}
+
+const HChar *showMsaVecOp(MSAVECOp op) {
+   const HChar *ret;
+
+   switch (op) {
+      case MSA_ANDV:
+         ret = "and.v";
+         break;
+
+      case MSA_ORV:
+         ret = "or.v";
+         break;
+
+      case MSA_XORV:
+         ret = "xor.v";
+         break;
+
+      case MSA_NORV:
+         ret = "nor.v";
+         break;
+
+      default:
+         vpanic("showMsaVecOp");
+         break;
+   }
+
+   return ret;
+}
+
+const HChar *showMsaBitOp(MSABITOp op) {
+   const HChar *ret;
+
+   switch (op) {
+      case MSA_SLLI:
+         ret = "slli";
+         break;
+
+      case MSA_SRAI:
+         ret = "srai";
+         break;
+
+      case MSA_SRLI:
+         ret = "srli";
+         break;
+
+      case MSA_SAT_S:
+         ret = "sat_s";
+         break;
+
+      case MSA_SRARI:
+         ret = "srari";
+         break;
+
+      default:
+         vpanic("showMsaBitOp");
+         break;
+   }
+
+   return ret;
+}
+
+const HChar *showMsa3RFOp(MSA3RFOp op) {
+   const HChar *ret;
+
+   switch (op) {
+      case MSA_FADD:
+         ret = "fadd";
+         break;
+
+      case MSA_FSUB:
+         ret = "fsub";
+         break;
+
+      case MSA_FMUL:
+         ret = "fmul";
+         break;
+
+      case MSA_FDIV:
+         ret = "fdiv";
+         break;
+
+      case MSA_MUL_Q:
+         ret = "mul_q";
+         break;
+
+      case MSA_MULR_Q:
+         ret = "mulr_q";
+         break;
+
+      case MSA_FCEQ:
+         ret = "fceq";
+         break;
+
+      case MSA_FCLT:
+         ret = "fclt";
+         break;
+
+      case MSA_FCUN:
+         ret = "fcun";
+         break;
+
+      case MSA_FEXP2:
+         ret = "fexp2";
+         break;
+
+      case MSA_FMIN:
+         ret = "fmin";
+         break;
+
+      case MSA_FMIN_A:
+         ret = "fmin_a";
+         break;
+
+      case MSA_FMAX:
+         ret = "fmax";
+         break;
+
+      case MSA_FMADD:
+         ret = "fmadd";
+         break;
+
+      case MSA_FMSUB:
+         ret = "fmsub";
+         break;
+
+      case MSA_FEXDO:
+         ret = "fexdo";
+         break;
+
+      case MSA_FTQ:
+         ret = "ftq";
+         break;
+
+      case MSA_FCLE:
+         ret = "fcle";
+         break;
+
+      default:
+         vpanic("showMsa3RFOp");
+         break;
+   }
+
+   return ret;
+}
+
 MIPSInstr *MIPSInstr_LI(HReg dst, ULong imm)
 {
    MIPSInstr *i = LibVEX_Alloc_inline(sizeof(MIPSInstr));
@@ -1188,6 +1679,93 @@ MIPSInstr* MIPSInstr_ProfInc ( void ) {
    return i;
 }
 
+
+MIPSInstr* MIPSInstr_MsaMi10(MSAMI10Op op, UInt s10, HReg rs, HReg wd,
+                             MSADF df) {
+   MIPSInstr* i             = LibVEX_Alloc_inline(sizeof(MIPSInstr));
+   i->tag                   = Msa_MI10;
+   i->Min.MsaMi10.op        = op;
+   i->Min.MsaMi10.s10       = s10;
+   i->Min.MsaMi10.rs        = rs;
+   i->Min.MsaMi10.wd        = wd;
+   i->Min.MsaMi10.df        = df;
+   return i;
+}
+
+MIPSInstr* MIPSInstr_MsaElm(MSAELMOp op, HReg ws, HReg wd, UInt dfn ) {
+   MIPSInstr* i      = LibVEX_Alloc_inline(sizeof(MIPSInstr));
+   i->tag            = Msa_ELM;
+   i->Min.MsaElm.op  = op;
+   i->Min.MsaElm.ws  = ws;
+   i->Min.MsaElm.wd  = wd;
+   i->Min.MsaElm.dfn = dfn;
+   return i;
+}
+
+MIPSInstr* MIPSInstr_Msa2R(MSA2ROp op, MSADF df, HReg ws, HReg wd ) {
+   MIPSInstr* i        = LibVEX_Alloc_inline(sizeof(MIPSInstr));
+   i->tag              = Msa_2R;
+   i->Min.Msa2R.op     = op;
+   i->Min.Msa2R.df     = df;
+   i->Min.Msa2R.ws     = ws;
+   i->Min.Msa2R.wd     = wd;
+   return i;
+}
+
+MIPSInstr* MIPSInstr_Msa3R(MSA3ROp op, MSADF df, HReg wd, HReg ws, HReg wt) {
+   MIPSInstr* i             = LibVEX_Alloc_inline(sizeof(MIPSInstr));
+   i->tag                   = Msa_3R;
+   i->Min.Msa3R.op          = op;
+   i->Min.Msa3R.df          = df;
+   i->Min.Msa3R.wd          = wd;
+   i->Min.Msa3R.wt          = wt;
+   i->Min.Msa3R.ws          = ws;
+   return i;
+}
+
+MIPSInstr* MIPSInstr_MsaVec(MSAVECOp op, HReg wd, HReg ws, HReg wt) {
+   MIPSInstr* i             = LibVEX_Alloc_inline(sizeof(MIPSInstr));
+   i->tag                   = Msa_VEC;
+   i->Min.MsaVec.op         = op;
+   i->Min.MsaVec.wd         = wd;
+   i->Min.MsaVec.wt         = wt;
+   i->Min.MsaVec.ws         = ws;
+   return i;
+}
+
+MIPSInstr* MIPSInstr_MsaBit(MSABITOp op, MSADF df, UChar ms, HReg ws, HReg wd) {
+   MIPSInstr* i             = LibVEX_Alloc_inline(sizeof(MIPSInstr));
+   i->tag                   = Msa_BIT;
+   i->Min.MsaBit.op         = op;
+   i->Min.MsaBit.df         = df;
+   i->Min.MsaBit.ws         = ws;
+   i->Min.MsaBit.wd         = wd;
+   i->Min.MsaBit.ms         = ms;
+   return i;
+}
+
+MIPSInstr* MIPSInstr_Msa3RF(MSA3RFOp op, MSADFFlx df, HReg wd, HReg ws,
+                            HReg wt) {
+   MIPSInstr* i              = LibVEX_Alloc_inline(sizeof(MIPSInstr));
+   i->tag                    = Msa_3RF;
+   i->Min.Msa3RF.op          = op;
+   i->Min.Msa3RF.df          = df;
+   i->Min.Msa3RF.wd          = wd;
+   i->Min.Msa3RF.wt          = wt;
+   i->Min.Msa3RF.ws          = ws;
+   return i;
+}
+
+MIPSInstr* MIPSInstr_Msa2RF(MSA2RFOp op, MSADFFlx df, HReg wd, HReg ws) {
+   MIPSInstr *i              = LibVEX_Alloc_inline(sizeof(MIPSInstr));
+   i->tag                    = Msa_2RF;
+   i->Min.Msa2RF.op          = op;
+   i->Min.Msa2RF.df          = df;
+   i->Min.Msa2RF.wd          = wd;
+   i->Min.Msa2RF.ws          = ws;
+   return i;
+}
+
 /* -------- Pretty Print instructions ------------- */
 static void ppLoadImm(HReg dst, ULong imm, Bool mode64)
 {
@@ -1196,6 +1774,29 @@ static void ppLoadImm(HReg dst, ULong imm, Bool mode64)
    vex_printf(",0x%016llx", imm);
 }
 
+static void MSAdfn(UInt dfn, MSADF* df, UInt* n) {
+   if ((dfn & 0x3e) == MSA_DFN_D) {
+      *df = MSA_D;
+      *n = dfn & 1;
+      return;
+   }
+
+   if ((dfn & 0x3c) == MSA_DFN_W) {
+      *df = MSA_W;
+      *n = dfn & 3;
+      return;
+   }
+
+   if ((dfn & 0x38) == MSA_DFN_H) {
+      *df = MSA_H;
+      *n = dfn & 7;
+      return;
+   }
+
+   *df = MSA_B;
+   *n = dfn & 3;
+}
+
 void ppMIPSInstr(const MIPSInstr * i, Bool mode64)
 {
    switch (i->tag) {
@@ -1587,6 +2188,162 @@ void ppMIPSInstr(const MIPSInstr * i, Bool mode64)
                        "addu $8, $8, $1; "
                        "sw $8, 4($9); " );
          return;
+      case Msa_MI10: {
+            Int imm = (i->Min.MsaMi10.s10 << 22) >> 22;
+
+            switch (i->Min.MsaMi10.df) {
+               case MSA_B:
+                  break;
+
+               case MSA_H:
+                  imm <<= 1;
+                  break;
+
+               case MSA_W:
+                  imm <<= 2;
+                  break;
+
+               case MSA_D:
+                  imm <<= 3;
+                  break;
+            }
+
+            vex_printf("%s.%c ", showMsaMI10op(i->Min.MsaMi10.op),
+                       showMsaDF(i->Min.MsaMi10.df));
+            ppHRegMIPS(i->Min.MsaMi10.wd, mode64);
+            vex_printf(", (%d)", imm);
+            ppHRegMIPS(i->Min.MsaMi10.rs, mode64);
+            return;
+         }
+
+      case Msa_ELM:
+         switch (i->Min.MsaElm.op) {
+            case MSA_MOVE:
+               vex_printf("move.v ");
+               ppHRegMIPS(i->Min.MsaElm.wd, mode64);
+               vex_printf(", ");
+               ppHRegMIPS(i->Min.MsaElm.ws, mode64);
+               break;
+
+            case MSA_SLDI: {
+               MSADF df;
+               UInt n;
+               MSAdfn(i->Min.MsaElm.dfn, &df, &n);
+               vex_printf("%s.%c ", showMsaElmOp(i->Min.MsaElm.op),
+                          showMsaDF(df));
+               ppHRegMIPS(i->Min.MsaElm.wd, mode64);
+               vex_printf(", ");
+               ppHRegMIPS(i->Min.MsaElm.ws, mode64);
+               vex_printf("[%u]", n);
+               break;
+            }
+
+            case MSA_INSVE: {
+               MSADF df;
+               UInt n;
+               MSAdfn(i->Min.MsaElm.dfn, &df, &n);
+               vex_printf("%s.%c ", showMsaElmOp(i->Min.MsaElm.op),
+                          showMsaDF(df));
+               ppHRegMIPS(i->Min.MsaElm.wd, mode64);
+               vex_printf("[%u], ", n);
+               ppHRegMIPS(i->Min.MsaElm.ws, mode64);
+               vex_printf("[0]");
+               break;
+            }
+
+            case MSA_COPY_S:
+            case MSA_COPY_U: {
+                  MSADF df;
+                  UInt n;
+                  MSAdfn(i->Min.MsaElm.dfn, &df, &n);
+                  vex_printf("%s.%c ", showMsaElmOp(i->Min.MsaElm.op),
+                             showMsaDF(df));
+                  ppHRegMIPS(i->Min.MsaElm.wd, mode64);
+                  vex_printf(", ");
+                  ppHRegMIPS(i->Min.MsaElm.ws, mode64);
+                  vex_printf("[%u]", n);
+                  break;
+               }
+
+            case MSA_INSERT: {
+                  MSADF df;
+                  UInt n;
+                  MSAdfn(i->Min.MsaElm.dfn, &df, &n);
+                  vex_printf("%s.%c ", showMsaElmOp(i->Min.MsaElm.op),
+                             showMsaDF(df));
+                  ppHRegMIPS(i->Min.MsaElm.wd, mode64);
+                  vex_printf("[%u], ", n);
+                  ppHRegMIPS(i->Min.MsaElm.ws, mode64);
+                  break;
+               }
+
+            case MSA_CFCMSA:
+               vex_printf("cfcmsa ");
+               ppHRegMIPS(i->Min.MsaElm.wd, mode64);
+               vex_printf(", $1");
+               break;
+
+            case MSA_CTCMSA:
+               vex_printf("ctcmsa $1, ");
+               ppHRegMIPS(i->Min.MsaElm.ws, mode64);
+               break;
+         }
+
+         return;
+
+      case Msa_3R:
+         vex_printf("%s.%c ",
+                    showMsa3ROp(i->Min.Msa3R.op), showMsaDF(i->Min.Msa3R.df));
+         ppHRegMIPS(i->Min.Msa3R.wd, mode64);
+         vex_printf(", ");
+         ppHRegMIPS(i->Min.Msa3R.ws, mode64);
+         vex_printf(", ");
+         ppHRegMIPS(i->Min.Msa3R.wt, mode64);
+         return;
+
+      case Msa_2R:
+         vex_printf("%s.%c ",
+                    showMsa2ROp(i->Min.Msa2R.op), showMsaDF(i->Min.Msa2R.df));
+         ppHRegMIPS(i->Min.Msa2R.wd, mode64);
+         vex_printf(", ");
+         ppHRegMIPS(i->Min.Msa2R.ws, mode64);
+         return;
+
+      case Msa_VEC:
+         vex_printf("%s ", showMsaVecOp(i->Min.MsaVec.op));
+         ppHRegMIPS(i->Min.MsaVec.wd, mode64);
+         vex_printf(", ");
+         ppHRegMIPS(i->Min.MsaVec.ws, mode64);
+         vex_printf(", ");
+         ppHRegMIPS(i->Min.MsaVec.wt, mode64);
+         return;
+
+      case Msa_BIT:
+         vex_printf("%s.%c ", showMsaBitOp(i->Min.MsaBit.op),
+                    showMsaDF(i->Min.MsaBit.df));
+         ppHRegMIPS(i->Min.MsaBit.wd, mode64);
+         vex_printf(", ");
+         ppHRegMIPS(i->Min.MsaBit.ws, mode64);
+         vex_printf(", %d ", i->Min.MsaBit.ms);
+         return;
+
+      case Msa_3RF:
+         vex_printf("%s.%c ", showMsa3RFOp(i->Min.Msa3RF.op),
+                    showMsaDFF(i->Min.Msa3RF.df, i->Min.Msa3RF.op));
+         ppHRegMIPS(i->Min.Msa3RF.wd, mode64);
+         vex_printf(", ");
+         ppHRegMIPS(i->Min.Msa3RF.ws, mode64);
+         vex_printf(", ");
+         ppHRegMIPS(i->Min.Msa3RF.wt, mode64);
+         return;
+
+      case Msa_2RF:
+         vex_printf("%s.%c ", showMsa2RFOp(i->Min.Msa2RF.op),
+                    showMsaDFF(i->Min.Msa2RF.df, i->Min.Msa2RF.op));
+         ppHRegMIPS(i->Min.Msa2RF.wd, mode64);
+         vex_printf(", ");
+         ppHRegMIPS(i->Min.Msa2RF.ws, mode64);
+         return;
       default:
          vpanic("ppMIPSInstr");
          break;
@@ -1659,6 +2416,84 @@ void getRegUsage_MIPSInstr(HRegUsage * u, const MIPSInstr * i, Bool mode64)
          addHRegUse(u, HRmRead, hregMIPS_LO(mode64));
          addHRegUse(u, HRmWrite, i->Min.MfHL.dst);
          return;
+      case Msa_MI10:
+         addHRegUse(u, HRmRead,  i->Min.MsaMi10.rs);
+
+         switch (i->Min.MsaMi10.op) {
+            case MSA_LD:
+               addHRegUse(u, HRmWrite,  i->Min.MsaMi10.wd);
+               break;
+
+            case MSA_ST:
+               addHRegUse(u, HRmRead,  i->Min.MsaMi10.wd);
+               break;
+         }
+
+         return;
+
+      case Msa_ELM:
+         if (LIKELY(i->Min.MsaElm.op != MSA_CFCMSA))
+            addHRegUse(u, HRmRead,  i->Min.MsaElm.ws);
+
+         switch (i->Min.MsaElm.op) {
+            case MSA_COPY_S:
+            case MSA_COPY_U:
+            case MSA_MOVE:
+            case MSA_CFCMSA:
+               addHRegUse(u, HRmWrite, i->Min.MsaElm.wd);
+               break;
+
+            case MSA_SLDI:
+            case MSA_INSERT:
+            case MSA_INSVE:
+               addHRegUse(u, HRmModify, i->Min.MsaElm.wd);
+               break;
+            case MSA_CTCMSA:
+               break;
+         }
+
+         return;
+
+      case Msa_3R:
+         addHRegUse(u, HRmRead,  i->Min.Msa3R.ws);
+         addHRegUse(u, HRmRead,  i->Min.Msa3R.wt);
+
+         if (i->Min.Msa3R.op == MSA_SLD ||
+               i->Min.Msa3R.op == MSA_VSHF) {
+            addHRegUse(u, HRmModify, i->Min.Msa3R.wd);
+         } else {
+            addHRegUse(u, HRmWrite, i->Min.Msa3R.wd);
+         }
+
+         return;
+
+      case Msa_2R:
+         addHRegUse(u, HRmWrite, i->Min.Msa2R.wd);
+         addHRegUse(u, HRmRead,  i->Min.Msa2R.ws);
+         return;
+
+      case Msa_VEC:
+         addHRegUse(u, HRmRead,  i->Min.MsaVec.ws);
+         addHRegUse(u, HRmRead,  i->Min.MsaVec.wt);
+         addHRegUse(u, HRmWrite, i->Min.MsaVec.wd);
+         return;
+
+      case Msa_BIT:
+         addHRegUse(u, HRmRead,  i->Min.MsaBit.ws);
+         addHRegUse(u, HRmWrite, i->Min.MsaBit.wd);
+         return;
+
+      case Msa_3RF:
+         addHRegUse(u, HRmRead,  i->Min.Msa3RF.ws);
+         addHRegUse(u, HRmRead,  i->Min.Msa3RF.wt);
+         addHRegUse(u, HRmWrite, i->Min.Msa3RF.wd);
+         return;
+
+      case Msa_2RF:
+         addHRegUse(u, HRmRead,  i->Min.Msa2RF.ws);
+         addHRegUse(u, HRmWrite, i->Min.Msa2RF.wd);
+         return;
+
       case Min_MtFCSR:
          addHRegUse(u, HRmRead, i->Min.MtFCSR.src);
          return;
@@ -1890,12 +2725,55 @@ void mapRegs_MIPSInstr(HRegRemap * m, MIPSInstr * i, Bool mode64)
          mapReg(m, &i->Min.Div.srcL);
          mapReg(m, &i->Min.Div.srcR);
          return;
+      case Msa_MI10:
+         mapReg(m, &i->Min.MsaMi10.rs);
+         mapReg(m, &i->Min.MsaMi10.wd);
+         return;
+
+      case Msa_ELM:
+         mapReg(m, &i->Min.MsaElm.ws);
+         mapReg(m, &i->Min.MsaElm.wd);
+         return;
+
+      case Msa_2R:
+         mapReg(m, &i->Min.Msa2R.wd);
+         mapReg(m, &i->Min.Msa2R.ws);
+         return;
+
       case Min_Call:
          {
             if (i->Min.Call.cond != MIPScc_AL)
                mapReg(m, &i->Min.Call.src);
             return;
          }
+      case Msa_3R:
+         mapReg(m, &i->Min.Msa3R.wt);
+         mapReg(m, &i->Min.Msa3R.ws);
+         mapReg(m, &i->Min.Msa3R.wd);
+         return;
+
+      case Msa_VEC:
+         mapReg(m, &i->Min.MsaVec.wt);
+         mapReg(m, &i->Min.MsaVec.ws);
+         mapReg(m, &i->Min.MsaVec.wd);
+         return;
+
+      case Msa_BIT:
+         mapReg(m, &i->Min.MsaBit.ws);
+         mapReg(m, &i->Min.MsaBit.wd);
+         return;
+
+      case Msa_3RF:
+         mapReg(m, &i->Min.Msa3RF.wt);
+         mapReg(m, &i->Min.Msa3RF.ws);
+         mapReg(m, &i->Min.Msa3RF.wd);
+         return;
+
+      case Msa_2RF:
+         mapReg(m, &i->Min.Msa2RF.ws);
+         mapReg(m, &i->Min.Msa2RF.wd);
+         return;
+
       case Min_XDirect:
          mapRegs_MIPSAMode(m, i->Min.XDirect.amPC);
          return;
@@ -2026,6 +2904,10 @@ void genSpill_MIPS( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2, HReg rreg,
       case HRcFlt64:
          *i1 = MIPSInstr_FpLdSt(False /*Store */ , 8, rreg, am);
          break;
+      case HRcVec128:
+         *i1 = MIPSInstr_MsaMi10(MSA_ST, (offsetB>>3),
+                                 GuestStatePointer(mode64), rreg, MSA_D);
+         break;
       default:
          ppHRegClass(hregClass(rreg));
          vpanic("genSpill_MIPS: unimplemented regclass");
@@ -2058,6 +2940,10 @@ void genReload_MIPS( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2, HReg rreg,
       case HRcFlt64:
          *i1 = MIPSInstr_FpLdSt(True /*Load */ , 8, rreg, am);
          break;
+      case HRcVec128:
+         *i1 = MIPSInstr_MsaMi10(MSA_LD, (offsetB>>3),
+                                 GuestStatePointer(mode64), rreg, MSA_D);
+         break;
       default:
          ppHRegClass(hregClass(rreg));
          vpanic("genReload_MIPS: unimplemented regclass");
@@ -2107,6 +2993,15 @@ inline static UInt dregNo(HReg r)
    return n;
 }
 
+inline static UInt qregEnc ( HReg r )
+{
+   UInt n;
+   vassert(!hregIsVirtual(r));
+   n = hregEncoding(r);
+   vassert(n <= 31);
+   return n;
+}
+
 /* Emit 32bit instruction */
 static UChar *emit32(UChar * p, UInt w32)
 {
@@ -2211,6 +3106,108 @@ static UChar *mkFormS(UChar * p, UInt opc1, UInt rRD, UInt rRS, UInt rRT,
    return emit32(p, theInstr);
 }
 
+static UChar *mkFormMI10(UChar * p, UInt msa, UInt s10, UInt rRS, UInt rWD,
+                         UInt opc, UInt rDF) {
+   UInt theInstr;
+   vassert(rDF < 0x04);
+   vassert(opc < 0x10);
+   vassert(rWD < 0x20);
+   vassert(rRS < 0x20);
+   vassert(s10 < 0x400);
+   vassert(msa < 0x40);
+   theInstr = ((msa << 26) | (s10 << 16) | (rRS << 11) | (rWD << 6) |
+               ((opc << 2) | rDF));
+   return emit32(p, theInstr);
+}
+
+static UChar *mkFormELM(UChar *p, UInt msa, UInt op, UInt df, UInt ws, UInt wd,
+                        UInt opc) {
+   UInt theInstr;
+   vassert(msa < 0x40);
+   vassert(ws  < 0x20);
+   vassert(wd  < 0x20);
+   vassert(opc < 0x40);
+   theInstr = ((msa << 26) | (op << 22) | (df << 16) | (ws << 11) |
+               ((wd << 6) | opc));
+   return emit32(p, theInstr);
+}
+
+static UChar *mkForm2R(UChar *p, UInt msa, UInt op, UInt df, UInt ws, UInt wd,
+                       UInt opc) {
+   UInt theInstr;
+   theInstr = ((msa << 26) | (op << 18) | (df << 16) | (ws << 11) |
+               (wd << 6) | opc);
+   return emit32(p, theInstr);
+}
+
+static UChar *mkForm3R(UChar *p, UInt op, UInt df, UInt wd, UInt ws, UInt wt) {
+   UInt theInstr;
+   vassert(op  < 0x3800040);
+   vassert(df  < 0x40);
+   vassert(wt  < 0x20);
+   vassert(ws  < 0x20);
+   vassert(wd  < 0x20);
+   theInstr = OPC_MSA | op  | (df << 21) | (wt << 16) | (ws << 11) |
+              (wd << 6);
+   return emit32(p, theInstr);
+}
+
+static UChar *mkFormVEC(UChar *p, UInt op, UInt ws, UInt wt, UInt wd) {
+   UInt theInstr;
+   vassert(op  < 0x20);
+   vassert(wt  < 0x20);
+   vassert(ws  < 0x20);
+   vassert(wd  < 0x20);
+   theInstr = OPC_MSA | (op << 21) | (wt << 16) | (ws << 11) |
+              (wd << 6) | 0x1E;
+   return emit32(p, theInstr);
+}
+
+static UChar *mkFormBIT(UChar *p, UInt op, UInt df, UInt ms, UInt ws, UInt wd) {
+   UInt theInstr;
+   UInt dfm = 0;
+   vassert(op  < 0x3800040);
+   vassert(df  < 0x40);
+   vassert(ms  < 0x100);
+   vassert(ws  < 0x20);
+   vassert(wd  < 0x20);
+
+   switch (df) {
+      case 0:
+         dfm |= 0x10;
+
+      case 1:
+         dfm |= 0x20;
+
+      case 2:
+         dfm |= 0x40;
+   }
+
+   dfm |= ms;
+   theInstr = OPC_MSA | op  | (dfm << 16) | (ws << 11) |
+              (wd << 6);
+   return emit32(p, theInstr);
+}
+
+static UChar *mkForm3RF(UChar *p, UInt op, UInt df, UInt wd, UInt ws, UInt wt) {
+   UInt theInstr;
+   vassert(op  < 0x3C0001D);
+   vassert(df  < 0x40);
+   vassert(wt  < 0x20);
+   vassert(ws  < 0x20);
+   vassert(wd  < 0x20);
+   theInstr = OPC_MSA | op  | (df << 21) | (wt << 16) | (ws << 11) |
+              (wd << 6);
+   return emit32(p, theInstr);
+}
+
+static UChar *mkForm2RF(UChar *p, UInt op, UInt df, UInt ws, UInt wd,
+                        UInt opc) {
+   UInt theInstr;
+   theInstr = OPC_MSA | (op << 17) | (df << 16) | (ws << 11) | (wd << 6) | opc;
+   return emit32(p, theInstr);
+}
+
 static UChar *doAMode_IR(UChar * p, UInt opc1, UInt rSD, MIPSAMode * am,
                          Bool mode64)
 {
@@ -2563,6 +3560,7 @@ Int emit_MIPSInstr ( /*MB_MOD*/Bool* is_profInc,
    UChar *ptmp = p;
    vassert(nbuf >= 32);
 
+
    switch (i->tag) {
       case Min_LI:
          p = mkLoadImm(p, iregNo(i->Min.LI.dst, mode64), i->Min.LI.imm, mode64);
@@ -2676,6 +3674,131 @@ Int emit_MIPSInstr ( /*MB_MOD*/Bool* is_profInc,
          goto done;
       }
 
+      case Msa_MI10: {
+            UInt v_reg = qregEnc(i->Min.MsaMi10.wd);
+            UInt r_reg = iregNo(i->Min.MsaMi10.rs, mode64);
+            p = mkFormMI10(p, 0x1E, i->Min.MsaMi10.s10, r_reg, v_reg, i->Min.MsaMi10.op,
+                           i->Min.MsaMi10.df);
+            goto done;
+         }
+
+      case Msa_ELM: {
+            UInt v_src, v_dst;
+
+            switch (i->Min.MsaElm.op) {
+               case MSA_INSERT:
+                  v_src = iregNo(i->Min.MsaElm.ws, mode64);
+                  v_dst = qregEnc(i->Min.MsaElm.wd);
+                  break;
+
+               case MSA_COPY_S:
+               case MSA_COPY_U:
+                  v_src = qregEnc(i->Min.MsaElm.ws);
+                  v_dst = iregNo(i->Min.MsaElm.wd, mode64);
+                  break;
+
+               case MSA_CTCMSA:
+                  v_src = iregNo(i->Min.MsaElm.ws, mode64);
+                  v_dst = 1;
+                  break;
+
+               case MSA_CFCMSA:
+                  v_src = 1;
+                  v_dst = iregNo(i->Min.MsaElm.wd, mode64);
+                  break;
+
+               default:
+                  v_src = qregEnc(i->Min.MsaElm.ws);
+                  v_dst = qregEnc(i->Min.MsaElm.wd);
+                  break;
+            }
+
+            switch (i->Min.MsaElm.op) {
+               case MSA_MOVE:
+               case MSA_CTCMSA:
+               case MSA_CFCMSA:
+                  p = mkFormELM(p, 0x1E, 0, i->Min.MsaElm.op, v_src, v_dst, 25);
+                  break;
+
+               default:
+                  p = mkFormELM(p, 0x1E, i->Min.MsaElm.op, i->Min.MsaElm.dfn, v_src, v_dst, 25);
+                  break;
+            }
+
+            goto done;
+         }
+
+      case Msa_3R: {
+            UInt v_wt;
+
+            switch (i->Min.Msa3R.op) {
+               case MSA_SLD:
+               case MSA_SPLAT:
+                  v_wt = iregNo(i->Min.Msa3R.wt, mode64);
+                  break;
+
+               default:
+                  v_wt = qregEnc(i->Min.Msa3R.wt);
+                  break;
+            }
+
+            UInt v_ws = qregEnc(i->Min.Msa3R.ws);
+            UInt v_wd = qregEnc(i->Min.Msa3R.wd);;
+            p = mkForm3R(p, i->Min.Msa3R.op, i->Min.Msa3R.df, v_wd, v_ws, v_wt);
+            goto done;
+         }
+
+      case Msa_2R: {
+            UInt v_src;
+            UInt v_dst;
+
+            switch (i->Min.Msa2R.op) {
+               case MSA_FILL:
+                  v_src = iregNo(i->Min.Msa2R.ws, mode64);
+                  v_dst = qregEnc(i->Min.Msa2R.wd);
+                  break;
+
+               default:
+                  v_src = qregEnc(i->Min.Msa2R.ws);
+                  v_dst = qregEnc(i->Min.Msa2R.wd);
+                  break;
+            }
+
+            p = mkForm2R(p, 0x1E, i->Min.Msa2R.op, i->Min.Msa2R.df, v_src, v_dst, 0x1E);
+            goto done;
+         }
+
+      case Msa_2RF: {
+            UInt v_src = qregEnc(i->Min.Msa2RF.ws);
+            UInt v_dst = qregEnc(i->Min.Msa2RF.wd);
+            p = mkForm2RF(p, i->Min.Msa2RF.op, i->Min.Msa2RF.df, v_src, v_dst, 0x1E);
+            goto done;
+         }
+
+      case Msa_VEC: {
+            UInt v_wt = qregEnc(i->Min.MsaVec.wt);
+            UInt v_ws = qregEnc(i->Min.MsaVec.ws);
+            UInt v_wd = qregEnc(i->Min.MsaVec.wd);
+            p = mkFormVEC(p, i->Min.MsaVec.op, v_wt, v_ws, v_wd);
+            goto done;
+         }
+
+      case Msa_BIT: {
+            UInt v_ws = qregEnc(i->Min.MsaBit.ws);
+            UInt v_wd = qregEnc(i->Min.MsaBit.wd);
+            p = mkFormBIT(p, i->Min.MsaBit.op, i->Min.Msa3R.df, i->Min.MsaBit.ms, v_ws,
+                          v_wd);
+            goto done;
+         }
+
+      case Msa_3RF: {
+            UInt v_wt = qregEnc(i->Min.Msa3RF.wt);
+            UInt v_ws = qregEnc(i->Min.Msa3RF.ws);
+            UInt v_wd = qregEnc(i->Min.Msa3RF.wd);;
+            p = mkForm3RF(p, i->Min.Msa3RF.op, i->Min.Msa3RF.df, v_wd, v_ws, v_wt);
+            goto done;
+         }
+
       case Min_Shft: {
          MIPSRH *srcR = i->Min.Shft.srcR;
          Bool sz32 = i->Min.Shft.sz32;
index fb681ac4de2c1cde64f0c5e403c19475a3f9dda4..c49def072c2e0a53f032c4c7c1e6ad32a766eb1d 100644 (file)
   mkHReg(False,  HRcFlt64, \
          (_enc), (_mode64) ? (_ix64) : (_ix32))
 
+#define VEC(_mode64, _enc, _ix64, _ix32) \
+  mkHReg(False,  HRcVec128, \
+         (_enc), (_mode64) ? (_ix64) : (_ix32))
+
 ST_IN HReg hregMIPS_GPR16 ( Bool mode64 ) { return GPR(mode64, 16,  0,  0); }
 ST_IN HReg hregMIPS_GPR17 ( Bool mode64 ) { return GPR(mode64, 17,  1,  1); }
 ST_IN HReg hregMIPS_GPR18 ( Bool mode64 ) { return GPR(mode64, 18,  2,  2); }
@@ -75,57 +79,79 @@ ST_IN HReg hregMIPS_F26   ( Bool mode64 ) { return FR (mode64, 26, 17, 17); }
 ST_IN HReg hregMIPS_F28   ( Bool mode64 ) { return FR (mode64, 28, 18, 18); }
 ST_IN HReg hregMIPS_F30   ( Bool mode64 ) { return FR (mode64, 30, 19, 19); }
 
+ST_IN HReg hregMIPS_W16    ( Bool mode64 ) { return VEC(mode64, 1, 20, 20); }
+ST_IN HReg hregMIPS_W17    ( Bool mode64 ) { return VEC(mode64, 3, 21, 21); }
+ST_IN HReg hregMIPS_W18    ( Bool mode64 ) { return VEC(mode64, 5, 22, 22); }
+ST_IN HReg hregMIPS_W19    ( Bool mode64 ) { return VEC(mode64, 7, 23, 23); }
+ST_IN HReg hregMIPS_W20    ( Bool mode64 ) { return VEC(mode64, 9, 24, 24); }
+ST_IN HReg hregMIPS_W21    ( Bool mode64 ) { return VEC(mode64, 11, 25, 25); }
+ST_IN HReg hregMIPS_W22    ( Bool mode64 ) { return VEC(mode64, 13, 26, 26); }
+ST_IN HReg hregMIPS_W23    ( Bool mode64 ) { return VEC(mode64, 15, 27, 27); }
+ST_IN HReg hregMIPS_W24    ( Bool mode64 ) { return VEC(mode64, 17, 28, 28); }
+ST_IN HReg hregMIPS_W25    ( Bool mode64 ) { return VEC(mode64, 19, 29, 29); }
+ST_IN HReg hregMIPS_W26    ( Bool mode64 ) { return VEC(mode64, 21, 30, 30); }
+ST_IN HReg hregMIPS_W27    ( Bool mode64 ) { return VEC(mode64, 23, 31, 31); }
+ST_IN HReg hregMIPS_W28    ( Bool mode64 ) { return VEC(mode64, 25, 32, 32); }
+ST_IN HReg hregMIPS_W29    ( Bool mode64 ) { return VEC(mode64, 27, 33, 33); }
+ST_IN HReg hregMIPS_W30    ( Bool mode64 ) { return VEC(mode64, 29, 34, 34); }
+ST_IN HReg hregMIPS_W31    ( Bool mode64 ) { return VEC(mode64, 31, 35, 35); }
+
 // DRs are only allocatable in 32-bit mode, so the 64-bit index numbering
 // doesn't advance here.
 ST_IN HReg hregMIPS_D0    ( Bool mode64 ) { vassert(!mode64);
-                                            return DR (mode64,  0,  0, 20); }
+                                            return DR (mode64,  0,  0, 36); }
 ST_IN HReg hregMIPS_D1    ( Bool mode64 ) { vassert(!mode64);
-                                            return DR (mode64,  2,  0, 21); }
+                                            return DR (mode64,  2,  0, 37); }
 ST_IN HReg hregMIPS_D2    ( Bool mode64 ) { vassert(!mode64);
-                                            return DR (mode64,  4,  0, 22); }
+                                            return DR (mode64,  4,  0, 38); }
 ST_IN HReg hregMIPS_D3    ( Bool mode64 ) { vassert(!mode64);
-                                            return DR (mode64,  6,  0, 23); }
+                                            return DR (mode64,  6,  0, 39); }
 ST_IN HReg hregMIPS_D4    ( Bool mode64 ) { vassert(!mode64);
-                                            return DR (mode64,  8,  0, 24); }
+                                            return DR (mode64,  8,  0, 40); }
 ST_IN HReg hregMIPS_D5    ( Bool mode64 ) { vassert(!mode64);
-                                            return DR (mode64, 10,  0, 25); }
+                                            return DR (mode64, 10,  0, 41); }
 ST_IN HReg hregMIPS_D6    ( Bool mode64 ) { vassert(!mode64);
-                                            return DR (mode64, 12,  0, 26); }
+                                            return DR (mode64, 12,  0, 42); }
 ST_IN HReg hregMIPS_D7    ( Bool mode64 ) { vassert(!mode64);
-                                            return DR (mode64, 14,  0, 27); }
-
-ST_IN HReg hregMIPS_HI    ( Bool mode64 ) { return FR (mode64, 33, 20, 28); }
-ST_IN HReg hregMIPS_LO    ( Bool mode64 ) { return FR (mode64, 34, 21, 29); }
-
-ST_IN HReg hregMIPS_GPR0  ( Bool mode64 ) { return GPR(mode64,  0, 22, 30); }
-ST_IN HReg hregMIPS_GPR1  ( Bool mode64 ) { return GPR(mode64,  1, 23, 31); }
-ST_IN HReg hregMIPS_GPR2  ( Bool mode64 ) { return GPR(mode64,  2, 24, 32); }
-ST_IN HReg hregMIPS_GPR3  ( Bool mode64 ) { return GPR(mode64,  3, 25, 33); }
-ST_IN HReg hregMIPS_GPR4  ( Bool mode64 ) { return GPR(mode64,  4, 26, 34); }
-ST_IN HReg hregMIPS_GPR5  ( Bool mode64 ) { return GPR(mode64,  5, 27, 35); }
-ST_IN HReg hregMIPS_GPR6  ( Bool mode64 ) { return GPR(mode64,  6, 28, 36); }
-ST_IN HReg hregMIPS_GPR7  ( Bool mode64 ) { return GPR(mode64,  7, 29, 37); }
-ST_IN HReg hregMIPS_GPR8  ( Bool mode64 ) { return GPR(mode64,  8, 30, 38); }
-ST_IN HReg hregMIPS_GPR9  ( Bool mode64 ) { return GPR(mode64,  9, 31, 39); }
-ST_IN HReg hregMIPS_GPR10 ( Bool mode64 ) { return GPR(mode64, 10, 32, 40); }
-ST_IN HReg hregMIPS_GPR11 ( Bool mode64 ) { return GPR(mode64, 11, 33, 41); }
-ST_IN HReg hregMIPS_GPR23 ( Bool mode64 ) { return GPR(mode64, 23, 34, 42); }
-ST_IN HReg hregMIPS_GPR25 ( Bool mode64 ) { return GPR(mode64, 25, 35, 43); }
-ST_IN HReg hregMIPS_GPR29 ( Bool mode64 ) { return GPR(mode64, 29, 36, 44); }
-ST_IN HReg hregMIPS_GPR31 ( Bool mode64 ) { return GPR(mode64, 31, 37, 45); }
+                                            return DR (mode64, 14,  0, 43); }
+
+ST_IN HReg hregMIPS_HI    ( Bool mode64 ) { return FR (mode64, 33, 36, 44); }
+ST_IN HReg hregMIPS_LO    ( Bool mode64 ) { return FR (mode64, 34, 37, 45); }
+
+ST_IN HReg hregMIPS_GPR0  ( Bool mode64 ) { return GPR(mode64,  0, 38, 46); }
+ST_IN HReg hregMIPS_GPR1  ( Bool mode64 ) { return GPR(mode64,  1, 39, 47); }
+ST_IN HReg hregMIPS_GPR2  ( Bool mode64 ) { return GPR(mode64,  2, 40, 48); }
+ST_IN HReg hregMIPS_GPR3  ( Bool mode64 ) { return GPR(mode64,  3, 41, 49); }
+ST_IN HReg hregMIPS_GPR4  ( Bool mode64 ) { return GPR(mode64,  4, 42, 50); }
+ST_IN HReg hregMIPS_GPR5  ( Bool mode64 ) { return GPR(mode64,  5, 43, 51); }
+ST_IN HReg hregMIPS_GPR6  ( Bool mode64 ) { return GPR(mode64,  6, 44, 52); }
+ST_IN HReg hregMIPS_GPR7  ( Bool mode64 ) { return GPR(mode64,  7, 45, 53); }
+ST_IN HReg hregMIPS_GPR8  ( Bool mode64 ) { return GPR(mode64,  8, 46, 54); }
+ST_IN HReg hregMIPS_GPR9  ( Bool mode64 ) { return GPR(mode64,  9, 47, 55); }
+ST_IN HReg hregMIPS_GPR10 ( Bool mode64 ) { return GPR(mode64, 10, 48, 56); }
+ST_IN HReg hregMIPS_GPR11 ( Bool mode64 ) { return GPR(mode64, 11, 49, 57); }
+ST_IN HReg hregMIPS_GPR23 ( Bool mode64 ) { return GPR(mode64, 23, 50, 58); }
+ST_IN HReg hregMIPS_GPR25 ( Bool mode64 ) { return GPR(mode64, 25, 51, 59); }
+ST_IN HReg hregMIPS_GPR29 ( Bool mode64 ) { return GPR(mode64, 29, 52, 60); }
+ST_IN HReg hregMIPS_GPR31 ( Bool mode64 ) { return GPR(mode64, 31, 53, 61); }
 
 #undef ST_IN
 #undef GPR
 #undef FR
 #undef DR
+#undef VEC
 
 #define GuestStatePointer(_mode64)     hregMIPS_GPR23(_mode64)
 #define StackFramePointer(_mode64)     hregMIPS_GPR30(_mode64)
 #define StackPointer(_mode64)          hregMIPS_GPR29(_mode64)
+#define Zero(_mode64)                  hregMIPS_GPR0(_mode64)
 
 /* guest_COND offset */
 #define COND_OFFSET(_mode64) ((_mode64) ? 588 : 448)
 
+/* guest_MSACSR offset */
+#define MSACSR_OFFSET(_mode64) ((_mode64) ? 1144 : 1016)
+
 /* Num registers used for function calls */
 #if defined(VGP_mips32_linux)
   /* a0, a1, a2, a3 */
@@ -137,6 +163,7 @@ ST_IN HReg hregMIPS_GPR31 ( Bool mode64 ) { return GPR(mode64, 31, 37, 45); }
 
 extern UInt ppHRegMIPS ( HReg, Bool );
 
+#define OPC_MSA        0x78000000
 
 /* --------- Condition codes, Intel encoding. --------- */
 typedef enum {
@@ -270,6 +297,153 @@ typedef enum {
 extern const HChar *showMIPSMaccOp(MIPSMaccOp, Bool);
 /* --------- */
 
+typedef enum {
+   MSA_LD = 8,
+   MSA_ST = 9
+} MSAMI10Op;
+
+extern const HChar *showMsaMI10op(MSAMI10Op);
+
+typedef enum {
+   MSA_SLDI   = 0,
+   MSA_COPY_S = 2,
+   MSA_COPY_U = 3,
+   MSA_INSERT = 4,
+   MSA_INSVE  = 5,
+   MSA_MOVE   = 0xBE,
+   MSA_CFCMSA = 0x7E,
+   MSA_CTCMSA = 0x3E
+} MSAELMOp;
+
+extern const HChar *showMsaElmOp(MSAELMOp);
+
+typedef enum {
+   MSA_FILL = 0xC0,
+   MSA_PCNT = 0xC1,
+   MSA_NLOC = 0xC2,
+   MSA_NLZC = 0xC3
+} MSA2ROp;
+
+extern const HChar *showMsa2ROp(MSA2ROp);
+
+typedef enum {
+   MSA_FTRUNC_S = 0x191,
+   MSA_FTRUNC_U = 0x192,
+   MSA_FFINT_S  = 0x19E,
+   MSA_FFINT_U  = 0x19F,
+   MSA_FSQRT    = 0x193,
+   MSA_FRSQRT   = 0x194,
+   MSA_FRCP     = 0x195,
+   MSA_FLOG2    = 0x197,
+   MSA_FEXUPR   = 0x199,
+   MSA_FTINT_U  = 0x19D,
+   MSA_FTINT_S  = 0x19C,
+} MSA2RFOp;
+
+extern const HChar *showMsa2RFOp(MSA2RFOp);
+
+typedef enum {
+   MSA_SLL = 0xD,
+   MSA_ADDV,
+   MSA_CEQ,
+   MSA_ADD_A,
+   MSA_SUBS_S,
+   MSA_SLD = 0x14,
+   MSA_SRA = 0x80000D,
+   MSA_SUBV,
+   MSA_SUBS_U = 0x800011,
+   MSA_SRL = 0x100000D,
+   MSA_MAX_S,
+   MSA_CLT_S,
+   MSA_ADDS_S,
+   MSA_PCKEV = 0x1000014,
+   MSA_MAX_U = 0x180000E,
+   MSA_CLT_U,
+   MSA_ADDS_U,
+   MSA_PCKOD = 0x1800014,
+   MSA_MIN_S = 0x200000E,
+   MSA_ILVL = 0x2000014,
+   MSA_MIN_U = 0x280000E,
+   MSA_ILVR = 0x2800014,
+   MSA_AVER_S = 0x3000010,
+   MSA_ILVEV = 0x3000014,
+   MSA_AVER_U = 0x3800010,
+   MSA_ILVOD = 0x3800014,
+   MSA_MULV = 0x0000012,
+   MSA_SPLAT = 0x0800014,
+   MSA_DIVS = 0x2000012,
+   MSA_DIVU = 0x2800012,
+   MSA_VSHF = 0x0000015,
+} MSA3ROp;
+
+extern const HChar *showMsa3ROp(MSA3ROp);
+
+typedef enum {
+   MSA_FADD   = 0x000001B,
+   MSA_FCUN   = 0x040001A,
+   MSA_FSUB   = 0x040001B,
+   MSA_FCEQ   = 0x080001A,
+   MSA_FMUL   = 0x080001B,
+   MSA_FDIV   = 0x0C0001B,
+   MSA_FMADD  = 0x100001B,
+   MSA_FCLT   = 0x100001A,
+   MSA_FMSUB  = 0x140001B,
+   MSA_FEXP2  = 0x1C0001B,
+   MSA_FMIN   = 0x300001B,
+   MSA_FMIN_A = 0x340001B,
+   MSA_FMAX   = 0x380001B,
+   MSA_MUL_Q  = 0x100001C,
+   MSA_FCLE   = 0x180001A,
+   MSA_FTQ    = 0x280001B,
+   MSA_FEXDO  = 0x200001B,
+   MSA_MULR_Q = 0x300001C,
+} MSA3RFOp;
+
+extern const HChar *showMsa3RFOp(MSA3RFOp);
+
+typedef enum {
+   MSA_ANDV,
+   MSA_ORV,
+   MSA_NORV,
+   MSA_XORV
+} MSAVECOp;
+
+extern const HChar *showMsaVecOp(MSAVECOp);
+
+typedef enum {
+   MSA_SLLI = 9,
+   MSA_SAT_S,
+   MSA_SRAI = 0x800009,
+   MSA_SRLI = 0x1000009,
+   MSA_SRARI = 0x100000A
+} MSABITOp;
+
+extern const HChar *showMsaBitOp(MSABITOp);
+
+typedef enum {
+   MSA_B = 0,
+   MSA_H = 1,
+   MSA_W = 2,
+   MSA_D = 3,
+} MSADF;
+
+extern HChar showMsaDF(MSADF df);
+
+typedef enum {
+   MSA_DFN_B    = 0x00,
+   MSA_DFN_H    = 0x20,
+   MSA_DFN_W    = 0x30,
+   MSA_DFN_D    = 0x38,
+} MSADFNMask;
+
+typedef enum {
+   MSA_F_WH = 0,
+   MSA_F_DW = 1,
+} MSADFFlx;
+
+extern HChar showMsaDFF(MSADFFlx df, int op);
+
+
 /* ----- Instruction tags ----- */
 typedef enum {
    Min_LI,         /* load word (32/64-bit) immediate (fake insn) */
@@ -321,7 +495,16 @@ typedef enum {
    Min_FpCompare,  /* FP compare, generating value into int reg */
 
    Min_FpGpMove,   /* Move from/to fpr to/from gpr */
-   Min_MoveCond    /* Move Conditional */
+   Min_MoveCond,   /* Move Conditional */
+
+   Msa_MI10,
+   Msa_ELM,
+   Msa_3R,
+   Msa_2R,
+   Msa_VEC,
+   Msa_BIT,
+   Msa_3RF,
+   Msa_2RF,
 } MIPSInstrTag;
 
 /* --------- */
@@ -498,6 +681,10 @@ typedef struct {
          HReg dst;
          MIPSAMode *src;
       } Load;
+      struct {
+         HReg data;
+         HReg addr;
+      } MsaLoad;
       /* 64/32/16/8 bit stores */
       struct {
          UChar sz;   /* 1|2|4|8 */
@@ -621,6 +808,58 @@ typedef struct {
          HReg src;
          HReg cond;
       } MoveCond;
+      struct {
+         MSAMI10Op op;
+         UInt s10;
+         HReg rs;
+         HReg wd;
+         MSADF df;
+      } MsaMi10;
+      struct {
+         MSAELMOp op;
+         HReg ws;
+         HReg wd;
+         UInt dfn;
+      } MsaElm;
+      struct {
+         MSA2ROp op;
+         MSADF df;
+         HReg ws;
+         HReg wd;
+      } Msa2R;
+      struct {
+         MSA3ROp op;
+         MSADF df;
+         HReg wt;
+         HReg ws;
+         HReg wd;
+      } Msa3R;
+      struct {
+         MSAVECOp op;
+         HReg wt;
+         HReg ws;
+         HReg wd;
+      } MsaVec;
+      struct {
+         MSABITOp op;
+         MSADF df;
+         UChar ms;
+         HReg ws;
+         HReg wd;
+      }MsaBit;
+      struct {
+         MSA3RFOp op;
+         MSADFFlx df;
+         HReg wt;
+         HReg ws;
+         HReg wd;
+      } Msa3RF;
+      struct {
+         MSA2RFOp op;
+         MSADFFlx df;
+         HReg ws;
+         HReg wd;
+      } Msa2RF;
 
    } Min;
 } MIPSInstr;
@@ -695,6 +934,15 @@ extern MIPSInstr *MIPSInstr_EvCheck(MIPSAMode* amCounter,
                                     MIPSAMode* amFailAddr );
 extern MIPSInstr *MIPSInstr_ProfInc( void );
 
+extern MIPSInstr* MIPSInstr_MsaMi10(MSAMI10Op op, UInt s10, HReg rs, HReg wd, MSADF df);
+extern MIPSInstr* MIPSInstr_MsaElm(MSAELMOp op, HReg ws, HReg wd, UInt dfn);
+extern MIPSInstr* MIPSInstr_Msa3R(MSA3ROp op, MSADF df, HReg wd, HReg ws, HReg wt);
+extern MIPSInstr* MIPSInstr_Msa2R(MSA2ROp op, MSADF df, HReg ws, HReg wd);
+extern MIPSInstr* MIPSInstr_MsaVec(MSAVECOp op, HReg wt, HReg ws, HReg wd);
+extern MIPSInstr* MIPSInstr_MsaBit(MSABITOp op, MSADF df, UChar ms, HReg ws, HReg wd);
+extern MIPSInstr* MIPSInstr_Msa3RF(MSA3RFOp op, MSADFFlx df, HReg wd, HReg ws, HReg wt);
+extern MIPSInstr* MIPSInstr_Msa2RF(MSA2RFOp op, MSADFFlx df, HReg wd, HReg ws);
+
 extern void ppMIPSInstr(const MIPSInstr *, Bool mode64);
 
 /* Some functions that insulate the register allocator from details
@@ -754,6 +1002,7 @@ extern VexInvalRange patchProfInc_MIPS ( VexEndness endness_host,
                                          const ULong* location_of_counter,
                                          Bool  mode64 );
 
+
 #endif /* ndef __VEX_HOST_MIPS_DEFS_H */
 
 /*---------------------------------------------------------------*/
index 3d51919fd626710d275a8f0b8fd4507ddb5db571..9e9bcb59bab1a18b5dca1020625a0295d6341e96 100644 (file)
@@ -58,6 +58,9 @@ static Bool fp_mode64 = False;
 /* Host hwcaps */
 static UInt hwcaps_host = 0;
 
+/* Host CPU has MSA ASE */
+static Bool has_msa = False;
+
 /* GPR register class for mips32/64 */
 #define HRcGPR(_mode64) ((_mode64) ? HRcInt64 : HRcInt32)
 
@@ -187,6 +190,13 @@ static HReg newVRegF(ISelEnv * env)
    return reg;
 }
 
+static HReg newVRegV ( ISelEnv* env )
+{
+   HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0, env->vreg_ctr);
+   env->vreg_ctr++;
+   return reg;
+}
+
 static void add_to_sp(ISelEnv * env, UInt n)
 {
    HReg sp = StackPointer(mode64);
@@ -241,6 +251,11 @@ static MIPSRH *iselWordExpr_RH5u(ISelEnv * env, IRExpr * e);
 static MIPSRH *iselWordExpr_RH6u_wrk(ISelEnv * env, IRExpr * e);
 static MIPSRH *iselWordExpr_RH6u(ISelEnv * env, IRExpr * e);
 
+/* Compute an I8 into a reg-or-7-bit-unsigned-immediate, the latter being an
+   immediate in the range 1 .. 127 inclusive.  Used for doing shift amounts. */
+static MIPSRH *iselWordExpr_RH7u_wrk(ISelEnv * env, IRExpr * e);
+static MIPSRH *iselWordExpr_RH7u(ISelEnv * env, IRExpr * e);
+
 /* compute an I8/I16/I32 into a GPR*/
 static HReg iselWordExpr_R_wrk(ISelEnv * env, IRExpr * e);
 static HReg iselWordExpr_R(ISelEnv * env, IRExpr * e);
@@ -259,6 +274,9 @@ static void iselInt128Expr_wrk(HReg * rHi, HReg * rLo,
                                ISelEnv * env, IRExpr * e);
 static void iselInt128Expr(HReg * rHi, HReg * rLo, ISelEnv * env, IRExpr * e);
 
+static HReg iselV128Expr( ISelEnv* env, IRExpr* e );
+static HReg iselV128Expr_wrk( ISelEnv* env, IRExpr* e );
+
 static MIPSCondCode iselCondCode_wrk(ISelEnv * env, IRExpr * e);
 static MIPSCondCode iselCondCode(ISelEnv * env, IRExpr * e);
 
@@ -300,6 +318,66 @@ static void set_MIPS_rounding_mode(ISelEnv * env, IRExpr * mode)
    addInstr(env, MIPSInstr_MtFCSR(tmp));
 }
 
+static void set_MIPS_rounding_mode_MSA(ISelEnv * env, IRExpr * mode) {
+   /*
+      rounding mode | MIPS | IR
+      ------------------------
+      to nearest    | 00  | 00
+      to zero       | 01  | 11
+      to +infinity  | 10  | 10
+      to -infinity  | 11  | 01
+    */
+   /* rm_MIPS32  = XOR(rm_IR , (rm_IR << 1)) & 3 */
+   HReg irrm = iselWordExpr_R(env, mode);
+   HReg tmp = newVRegI(env);
+   HReg msacsr_old = newVRegI(env);
+   MIPSAMode *am_addr;
+   addInstr(env, MIPSInstr_Shft(Mshft_SLL, True, tmp, irrm,
+                                MIPSRH_Imm(False, 1)));
+   addInstr(env, MIPSInstr_Alu(Malu_XOR, tmp, irrm, MIPSRH_Reg(tmp)));
+   addInstr(env, MIPSInstr_Alu(Malu_AND, tmp, tmp, MIPSRH_Imm(False, 3)));
+   /* save old value of MSACSR */
+   addInstr(env, MIPSInstr_MsaElm(MSA_CFCMSA, hregMIPS_GPR0(mode64), msacsr_old,
+                                  MSA_DFN_W));
+   sub_from_sp(env, 8); /*  Move SP down 8 bytes */
+   am_addr = MIPSAMode_IR(0, StackPointer(mode64));
+   /* store old MSACSR to stack */
+   addInstr(env, MIPSInstr_Store(4, am_addr, msacsr_old, mode64));
+   /* set new value of MSACSR */
+   addInstr(env, MIPSInstr_MsaElm(MSA_CTCMSA, tmp, hregMIPS_GPR0(mode64),
+                                  MSA_DFN_W));
+}
+
+
+static void set_guest_MIPS_rounding_mode_MSA(ISelEnv * env) {
+   /*
+      rounding mode | MIPS | IR
+      ------------------------
+      to nearest    | 00  | 00
+      to zero       | 01  | 11
+      to +infinity  | 10  | 10
+      to -infinity  | 11  | 01
+    */
+   /* rm_MIPS32  = XOR(rm_IR , (rm_IR << 1)) & 3 */
+   HReg irrm =  newVRegI(env);
+   HReg msacsr_old = newVRegI(env);
+   MIPSAMode *am_addr;
+   MIPSAMode *rm_addr = MIPSAMode_IR(MSACSR_OFFSET(mode64),
+                                     GuestStatePointer(mode64));
+   addInstr(env, MIPSInstr_Load(4, irrm, rm_addr, mode64));
+   /* save old value of MSACSR */
+   addInstr(env, MIPSInstr_MsaElm(MSA_CFCMSA, hregMIPS_GPR0(mode64), msacsr_old,
+                                  MSA_DFN_W));
+   sub_from_sp(env, 8); /*  Move SP down 8 bytes */
+   am_addr = MIPSAMode_IR(0, StackPointer(mode64));
+   /* store old MSACSR to stack */
+   addInstr(env, MIPSInstr_Store(4, am_addr, msacsr_old, mode64));
+   /* set new value of MSACSR */
+   addInstr(env, MIPSInstr_MsaElm(MSA_CTCMSA, irrm, hregMIPS_GPR0(mode64),
+                                  MSA_DFN_W));
+}
+
+
 static void set_MIPS_rounding_default(ISelEnv * env)
 {
    HReg fcsr = newVRegI(env);
@@ -315,6 +393,18 @@ static void set_MIPS_rounding_default(ISelEnv * env)
    addInstr(env, MIPSInstr_MtFCSR(fcsr));
 }
 
+static void set_MIPS_rounding_default_MSA(ISelEnv * env) {
+   HReg msacsr = newVRegI(env);
+   /* load as float */
+   MIPSAMode *am_addr;
+   am_addr = MIPSAMode_IR(0, StackPointer(mode64));
+   addInstr(env, MIPSInstr_Load(4, msacsr, am_addr, mode64));
+   add_to_sp(env, 8);  /* Reset SP */
+   /* set new value of FCSR*/
+   addInstr(env, MIPSInstr_MsaElm(MSA_CTCMSA, msacsr, hregMIPS_GPR0(mode64),
+                                  MSA_DFN_W));
+}
+
 /*---------------------------------------------------------*/
 /*--- ISEL: Misc helpers                                ---*/
 /*---------------------------------------------------------*/
@@ -515,7 +605,7 @@ static void doHelperCall(/*OUT*/UInt*   stackAdjustAfterCall,
          if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
             aTy = typeOfIRExpr(env->type_env, arg);
 
-         if (aTy == Ity_I32 || mode64) {
+         if (aTy == Ity_I32 || (mode64 && aTy != Ity_INVALID)) {
             argiregs |= (1 << (argreg + 4));
             addInstr(env, mk_iMOVds_RR(argregs[argreg],
                                        iselWordExpr_R(env, arg)));
@@ -556,7 +646,7 @@ static void doHelperCall(/*OUT*/UInt*   stackAdjustAfterCall,
          if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
             aTy  = typeOfIRExpr(env->type_env, arg);
 
-         if (aTy == Ity_I32 || (mode64 && arg->tag != Iex_GSPTR)) {
+         if (aTy == Ity_I32 || (mode64 && aTy != Ity_INVALID)) {
             tmpregs[argreg] = iselWordExpr_R(env, arg);
             argreg++;
          } else if (aTy == Ity_I64) {  /* Ity_I64 */
@@ -575,8 +665,8 @@ static void doHelperCall(/*OUT*/UInt*   stackAdjustAfterCall,
             argreg++;
          }
          else if (arg->tag == Iex_VECRET) {
-            // If this happens, it denotes ill-formed IR
-            vassert(0);
+            tmpregs[argreg++] = StackPointer(mode64);
+            sub_from_sp(env, 16); /*  Move SP down 16 bytes */
          }
       }
 
@@ -623,7 +713,6 @@ static void doHelperCall(/*OUT*/UInt*   stackAdjustAfterCall,
          *retloc = mk_RetLoc_simple(RLPri_Int);
          break;
       case Ity_V128:
-         vassert(0); // ATC
          *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
          *stackAdjustAfterCall = 16;
          break;
@@ -957,8 +1046,22 @@ static HReg iselWordExpr_R_wrk(ISelEnv * env, IRExpr * e)
             return r_dst;
          }
 
+          if (!mode64 && (e->Iex.Binop.op == Iop_CasCmpEQ64
+              || e->Iex.Binop.op == Iop_CmpEQ64)) {
+             HReg tmp1, tmp2, tmp3, tmp4;
+             HReg dst1 = newVRegI(env);
+             HReg dst2 = newVRegI(env);
+             iselInt64Expr(&tmp1, &tmp2, env, e->Iex.Binop.arg1);
+             iselInt64Expr(&tmp3, &tmp4, env, e->Iex.Binop.arg2);
+             addInstr(env, MIPSInstr_Cmp(False, True, dst1, tmp1, tmp3, MIPScc_EQ));
+             addInstr(env, MIPSInstr_Cmp(False, True, dst2, tmp2, tmp4, MIPScc_EQ));
+             addInstr(env, MIPSInstr_Alu(Malu_AND, dst1, dst1, MIPSRH_Reg(dst2)));
+             return dst1;
+          }
+
          /* Cmp*32*(x,y) ? */
          if (e->Iex.Binop.op == Iop_CmpEQ32
+             || e->Iex.Binop.op == Iop_CmpEQ8
              || e->Iex.Binop.op == Iop_CmpEQ16
              || e->Iex.Binop.op == Iop_CmpNE32
              || e->Iex.Binop.op == Iop_CmpNE64
@@ -990,6 +1093,7 @@ static HReg iselWordExpr_R_wrk(ISelEnv * env, IRExpr * e)
                   cc = MIPScc_EQ;
                   size32 = True;
                   break;
+               case Iop_CmpEQ8:
                case Iop_CmpEQ16:
                   cc = MIPScc_EQ;
                   size32 = True;
@@ -1100,6 +1204,38 @@ static HReg iselWordExpr_R_wrk(ISelEnv * env, IRExpr * e)
             return r_tmpR;
          }
 
+         if (e->Iex.Binop.op == Iop_MullU8 ||
+             e->Iex.Binop.op == Iop_MullS8 ||
+             e->Iex.Binop.op == Iop_MullU16 ||
+             e->Iex.Binop.op == Iop_MullS16) {
+            Bool syned = toBool((e->Iex.Binop.op == Iop_MullS8) ||
+                                (e->Iex.Binop.op == Iop_MullS16));
+            HReg r_dst = newVRegI(env);
+            HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+            HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+            if (syned) {
+               Int no_bits = (e->Iex.Binop.op == Iop_MullS16) ? 16 : 24;
+               addInstr(env, MIPSInstr_Shft(Mshft_SLL, True,
+                                            r_srcL, r_srcL,
+                                            MIPSRH_Imm(False, no_bits)));
+               addInstr(env, MIPSInstr_Shft(Mshft_SRA, True,
+                                            r_srcL, r_srcL,
+                                            MIPSRH_Imm(False, no_bits)));
+               addInstr(env, MIPSInstr_Shft(Mshft_SLL, True,
+                                            r_srcR, r_srcR,
+                                            MIPSRH_Imm(False, no_bits)));
+               addInstr(env, MIPSInstr_Shft(Mshft_SRA, True,
+                                            r_srcR, r_srcR,
+                                            MIPSRH_Imm(False, no_bits)));
+               addInstr(env, MIPSInstr_Mul(r_dst, r_srcL, r_srcR));
+
+            } else {
+               addInstr(env, MIPSInstr_Mult(syned, r_srcL, r_srcR));
+               addInstr(env, MIPSInstr_Mflo(r_dst));
+            }
+            return r_dst;
+         }
+
          if (e->Iex.Binop.op == Iop_CmpF64) {
             HReg r_srcL, r_srcR;
             if (mode64) {
@@ -1214,6 +1350,22 @@ static HReg iselWordExpr_R_wrk(ISelEnv * env, IRExpr * e)
             return r_dst;
          }
 
+         if (e->Iex.Binop.op == Iop_DivS32 ||
+             e->Iex.Binop.op == Iop_DivU32 ||
+             (e->Iex.Binop.op == Iop_DivS64 && mode64) ||
+             (e->Iex.Binop.op == Iop_DivU64 && mode64)) {
+            HReg r_dst = newVRegI(env);
+            Bool syned = toBool(e->Iex.Binop.op == Iop_DivS32 ||
+                                e->Iex.Binop.op == Iop_DivS64);
+            Bool div32 = toBool(e->Iex.Binop.op == Iop_DivS32 ||
+                                e->Iex.Binop.op == Iop_DivU32);
+            HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+            HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+            addInstr(env, MIPSInstr_Div(syned, div32, r_srcL, r_srcR));
+            addInstr(env, MIPSInstr_Mflo(r_dst));
+            return r_dst;
+         }
+
          if (e->Iex.Binop.op == Iop_8HLto16
              || e->Iex.Binop.op == Iop_16HLto32) {
             HReg tHi   = iselWordExpr_R(env, e->Iex.Binop.arg1);
@@ -1308,6 +1460,163 @@ static HReg iselWordExpr_R_wrk(ISelEnv * env, IRExpr * e)
             return r_dst;
          }
 
+         if (e->Iex.Binop.op == Iop_F32toI32U) {
+            HReg valF = iselFltExpr(env, e->Iex.Binop.arg2);
+            HReg tmpD = newVRegD(env);
+            HReg r_dst  = newVRegI(env);
+            MIPSAMode *am_addr;
+
+            /* CVTLS tmpD, valF */
+            set_MIPS_rounding_mode(env, e->Iex.Binop.arg1);
+            addInstr(env, MIPSInstr_FpConvert(Mfp_CVTLS, tmpD, valF));
+            set_MIPS_rounding_default(env);
+
+            sub_from_sp(env, 16);  /* Move SP down 16 bytes */
+            am_addr = MIPSAMode_IR(0, StackPointer(mode64));
+
+            /* store as F64 */
+            addInstr(env, MIPSInstr_FpLdSt(False /*store */ , 8, tmpD,
+                                           am_addr));
+            /* load as 2xI32 */
+#if defined (_MIPSEL)
+            addInstr(env, MIPSInstr_Load(4, r_dst, am_addr, mode64));
+#elif defined (_MIPSEB)
+            addInstr(env, MIPSInstr_Load(4, r_dst, nextMIPSAModeFloat(am_addr),
+                                         mode64));
+#endif
+
+            /* Reset SP */
+            add_to_sp(env, 16);
+
+            return r_dst;
+         }
+
+         if (e->Iex.Binop.op == Iop_F64toI64U) {
+            HReg r_src;
+            HReg tmp = newVRegV(env);
+            vassert(has_msa);
+            r_src = iselFltExpr( env, e->Iex.Binop.arg2);
+            set_MIPS_rounding_mode_MSA(env, e->Iex.Binop.arg1);
+            addInstr(env, MIPSInstr_Msa2RF(MSA_FTINT_U, MSA_F_DW, tmp, r_src));
+            HReg r_dst = newVRegI(env);
+            addInstr(env,
+                     MIPSInstr_MsaElm(MSA_COPY_S, tmp, r_dst, MSA_DFN_D | 0));
+            set_MIPS_rounding_default_MSA(env);
+            return r_dst;
+         }
+
+         if (e->Iex.Binop.op == Iop_GetElem8x16) {
+            HReg v_src = iselV128Expr(env, e->Iex.Binop.arg1);
+            HReg r_dst = newVRegI(env);
+            MIPSRH *tmp = iselWordExpr_RH(env, False, e->Iex.Binop.arg2);
+            vassert(has_msa);
+            switch (tmp->tag) {
+               case Mrh_Imm:
+                  addInstr(env,
+                           MIPSInstr_MsaElm(MSA_COPY_U, v_src, r_dst,
+                                            MSA_DFN_B |
+                                            (tmp->Mrh.Imm.imm16 & 0x0f)));
+                  break;
+
+               case Mrh_Reg: {
+                     HReg v_tmp = newVRegV(env);
+                     addInstr(env,
+                              MIPSInstr_Msa3R(MSA_SPLAT, MSA_B, v_tmp, v_src,
+                                              tmp->Mrh.Reg.reg));
+                     addInstr(env,
+                              MIPSInstr_MsaElm(MSA_COPY_U, v_tmp, r_dst,
+                                               MSA_DFN_B));
+                     break;
+                  }
+            }
+
+            return r_dst;
+         }
+
+
+         if (e->Iex.Binop.op == Iop_GetElem16x8) {
+            HReg v_src = iselV128Expr(env, e->Iex.Binop.arg1);
+            HReg r_dst = newVRegI(env);
+            MIPSRH *tmp = iselWordExpr_RH(env, False, e->Iex.Binop.arg2);
+            vassert(has_msa);
+            switch (tmp->tag) {
+               case Mrh_Imm:
+                  addInstr(env,
+                           MIPSInstr_MsaElm(MSA_COPY_U, v_src, r_dst,
+                                            MSA_DFN_H |
+                                            (tmp->Mrh.Imm.imm16 & 0x07)));
+                  break;
+
+               case Mrh_Reg: {
+                     HReg v_tmp = newVRegV(env);
+                     addInstr(env,
+                              MIPSInstr_Msa3R(MSA_SPLAT, MSA_H, v_tmp, v_src,
+                                              tmp->Mrh.Reg.reg));
+                     addInstr(env,
+                              MIPSInstr_MsaElm(MSA_COPY_U, v_tmp, r_dst,
+                                               MSA_DFN_H));
+                     break;
+                  }
+            }
+
+            return r_dst;
+         }
+
+         if (e->Iex.Binop.op == Iop_GetElem32x4) {
+            HReg v_src = iselV128Expr(env, e->Iex.Binop.arg1);
+            HReg r_dst = newVRegI(env);
+            MIPSRH *tmp = iselWordExpr_RH(env, False, e->Iex.Binop.arg2);
+            vassert(has_msa);
+            switch (tmp->tag) {
+               case Mrh_Imm:
+                  addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, v_src, r_dst,
+                                                 MSA_DFN_W |
+                                                 (tmp->Mrh.Imm.imm16 & 0x03)));
+                  break;
+
+               case Mrh_Reg: {
+                     HReg v_tmp = newVRegV(env);
+                     addInstr(env,
+                              MIPSInstr_Msa3R(MSA_SPLAT, MSA_W, v_tmp, v_src,
+                                              tmp->Mrh.Reg.reg));
+                     addInstr(env,
+                              MIPSInstr_MsaElm(MSA_COPY_S, v_tmp, r_dst,
+                                               MSA_DFN_W));
+                     break;
+                  }
+            }
+
+            return r_dst;
+         }
+         if (e->Iex.Binop.op == Iop_GetElem64x2) {
+            vassert(mode64);
+            HReg v_src = iselV128Expr(env, e->Iex.Binop.arg1);
+            HReg r_dst = newVRegI(env);
+            MIPSRH *tmp = iselWordExpr_RH(env, False, e->Iex.Binop.arg2);
+            vassert(has_msa);
+            switch (tmp->tag) {
+               case Mrh_Imm:
+                  addInstr(env,
+                           MIPSInstr_MsaElm(MSA_COPY_S, v_src, r_dst,
+                                            MSA_DFN_D |
+                                            (tmp->Mrh.Imm.imm16 & 0x01)));
+                  break;
+
+               case Mrh_Reg: {
+                     HReg v_tmp = newVRegV(env);
+                     addInstr(env,
+                              MIPSInstr_Msa3R(MSA_SPLAT, MSA_D, v_tmp, v_src,
+                                              tmp->Mrh.Reg.reg));
+                     addInstr(env,
+                              MIPSInstr_MsaElm(MSA_COPY_S, v_tmp, r_dst,
+                                               MSA_DFN_D));
+                     break;
+                  }
+            }
+
+            return r_dst;
+         }
+
          /* -------- DSP ASE -------- */
          /* All used cases involving host-side helper calls. */
          void* fn = NULL;
@@ -1502,11 +1811,15 @@ static HReg iselWordExpr_R_wrk(ISelEnv * env, IRExpr * e)
 
          case Iop_64to1:
          case Iop_64to8: {
-            vassert(mode64);
             HReg r_src, r_dst;
             UShort mask = (op_unop == Iop_64to1) ? 0x1 : 0xFF;
             r_dst = newVRegI(env);
-            r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+            if (mode64)
+               r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+            else {
+              HReg tmp;
+              iselInt64Expr(&tmp, &r_src, env, e->Iex.Unop.arg);
+            }
             addInstr(env, MIPSInstr_Alu(Malu_AND, r_dst, r_src,
                           MIPSRH_Imm(False, mask)));
             return r_dst;
@@ -1720,6 +2033,52 @@ static HReg iselWordExpr_R_wrk(ISelEnv * env, IRExpr * e)
             return rLo;  /* and abandon rLo .. poor wee thing :-) */
          }
 
+         case Iop_V128to32: {
+            HReg i_dst = newVRegI(env);
+            HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+            vassert(has_msa);
+            addInstr(env,
+                     MIPSInstr_MsaElm(MSA_COPY_S, v_src, i_dst, MSA_DFN_W));
+            return i_dst;
+         }
+
+         case Iop_V128HIto64: {
+            vassert(mode64);
+            vassert(has_msa);
+            HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+            HReg reg = newVRegI(env);
+            addInstr(env,
+                     MIPSInstr_MsaElm(MSA_COPY_S, v_src, reg, MSA_DFN_D | 1));
+            return reg;
+         }
+
+         case Iop_V128to64: {
+            vassert(mode64);
+            vassert(has_msa);
+            HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+            HReg reg = newVRegI(env);
+            addInstr(env,
+                     MIPSInstr_MsaElm(MSA_COPY_S, v_src, reg, MSA_DFN_D | 0));
+            return reg;
+         }
+
+         case Iop_F32toF16x4: {
+            vassert(mode64);
+            vassert(has_msa);
+            HReg v_arg = iselV128Expr(env, e->Iex.Unop.arg);
+            HReg v_src = newVRegV(env);
+            set_guest_MIPS_rounding_mode_MSA(env);
+            addInstr(env,
+                     MIPSInstr_Msa3RF(MSA_FEXDO, MSA_F_WH,
+                                      v_src, v_arg, v_arg));
+            set_MIPS_rounding_default_MSA(env);
+            HReg reg = newVRegI(env);
+            addInstr(env,
+                     MIPSInstr_MsaElm(MSA_COPY_S, v_src, reg, MSA_DFN_D | 0));
+            return reg;
+         }
+
+
          default:
             break;
       }
@@ -1981,7 +2340,6 @@ static MIPSRH *iselWordExpr_RH5u_wrk(ISelEnv * env, IRExpr * e)
 
 /* --------------------- RH6u --------------------- */
 
-/* Only used in 64-bit mode. */
 static MIPSRH *iselWordExpr_RH6u ( ISelEnv * env, IRExpr * e )
 {
    MIPSRH *ri;
@@ -1997,7 +2355,7 @@ static MIPSRH *iselWordExpr_RH6u ( ISelEnv * env, IRExpr * e )
       vassert(hregIsVirtual(ri->Mrh.Reg.reg));
       return ri;
    default:
-      vpanic("iselIntExpr_RH6u: unknown mips64 RI tag");
+      vpanic("iselIntExpr_RH6u: unknown RI tag");
    }
 }
 
@@ -2019,6 +2377,46 @@ static MIPSRH *iselWordExpr_RH6u_wrk ( ISelEnv * env, IRExpr * e )
    /* default case: calculate into a register and return that */
    return MIPSRH_Reg(iselWordExpr_R(env, e));
 }
+/* --------------------- RH7u --------------------- */
+
+static MIPSRH *iselWordExpr_RH7u ( ISelEnv * env, IRExpr * e )
+{
+   MIPSRH *ri;
+   ri = iselWordExpr_RH7u_wrk(env, e);
+   /* sanity checks ... */
+   switch (ri->tag) {
+   case Mrh_Imm:
+      vassert(ri->Mrh.Imm.imm16 >= 1 && ri->Mrh.Imm.imm16 <= 127);
+      vassert(!ri->Mrh.Imm.syned);
+      return ri;
+   case Mrh_Reg:
+      vassert(hregClass(ri->Mrh.Reg.reg) == HRcGPR(env->mode64));
+      vassert(hregIsVirtual(ri->Mrh.Reg.reg));
+      return ri;
+   default:
+      vpanic("iselIntExpr_RH7u: unknown RI tag");
+   }
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static MIPSRH *iselWordExpr_RH7u_wrk ( ISelEnv * env, IRExpr * e )
+{
+   IRType ty = typeOfIRExpr(env->type_env, e);
+   vassert(ty == Ity_I8);
+
+   /* special case: immediate */
+   if (e->tag == Iex_Const
+       && e->Iex.Const.con->tag == Ico_U8
+       && e->Iex.Const.con->Ico.U8 >= 1 && e->Iex.Const.con->Ico.U8 <= 127)
+   {
+      return MIPSRH_Imm(False /*unsigned */ ,
+              e->Iex.Const.con->Ico.U8);
+   }
+
+   /* default case: calculate into a register and return that */
+   return MIPSRH_Reg(iselWordExpr_R(env, e));
+}
+
 
 /* --------------------- CONDCODE --------------------- */
 
@@ -2155,78 +2553,2123 @@ static MIPSCondCode iselCondCode_wrk(ISelEnv * env, IRExpr * e)
 }
 
 /*---------------------------------------------------------*/
-/*--- ISEL: Integer expressions (128 bit)               ---*/
+/*--- ISEL: Vector expressions (128 bit - SIMD)         ---*/
 /*---------------------------------------------------------*/
 
-/* 64-bit mode ONLY: compute a 128-bit value into a register pair,
-   which is returned as the first two parameters.  As with
-   iselWordExpr_R, these may be either real or virtual regs; in any
-   case they must not be changed by subsequent code emitted by the
-   caller.  */
-
-static void iselInt128Expr(HReg * rHi, HReg * rLo, ISelEnv * env, IRExpr * e)
-{
-   vassert(env->mode64);
-   iselInt128Expr_wrk(rHi, rLo, env, e);
-   vassert(hregClass(*rHi) == HRcGPR(env->mode64));
-   vassert(hregIsVirtual(*rHi));
-   vassert(hregClass(*rLo) == HRcGPR(env->mode64));
-   vassert(hregIsVirtual(*rLo));
+/* Compute a vector value into vector register.            */
+static HReg iselV128Expr (ISelEnv* env, IRExpr* e) {
+   vassert(has_msa);
+   HReg r = iselV128Expr_wrk(env, e);
+   vassert(hregClass(r) == HRcVec128);
+   vassert(hregIsVirtual(r));
+   return r;
 }
 
 /* DO NOT CALL THIS DIRECTLY ! */
-static void iselInt128Expr_wrk(HReg * rHi, HReg * rLo, ISelEnv * env,
-                               IRExpr * e)
-{
+static HReg iselV128Expr_wrk(ISelEnv* env, IRExpr* e) {
+   IRType ty = typeOfIRExpr(env->type_env, e);
    vassert(e);
-   vassert(typeOfIRExpr(env->type_env, e) == Ity_I128);
+   vassert(ty == Ity_V128);
 
-   /* read 128-bit IRTemp */
    if (e->tag == Iex_RdTmp) {
-      lookupIRTempPair(rHi, rLo, env, e->Iex.RdTmp.tmp);
-      return;
+      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
    }
 
-   /* --------- BINARY ops --------- */
-   if (e->tag == Iex_Binop) {
-      switch (e->Iex.Binop.op) {
-         /* 64 x 64 -> 128 multiply */
-         case Iop_MullU64:
-         case Iop_MullS64: {
-            HReg tLo = newVRegI(env);
-            HReg tHi = newVRegI(env);
-            Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
-            HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
-            HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
-            addInstr(env, MIPSInstr_Mult(syned, r_srcL, r_srcR));
-            addInstr(env, MIPSInstr_Mfhi(tHi));
-            addInstr(env, MIPSInstr_Mflo(tLo));
-            *rHi = tHi;
-            *rLo = tLo;
-            return;
-         }
+   if (e->tag == Iex_Load) {
+      vassert (e->Iex.Load.ty == Ity_V128);
+      HReg v_dst = newVRegV(env);
+      addInstr(env, MIPSInstr_MsaMi10(MSA_LD, 0, iselWordExpr_R(env,
+                                      e->Iex.Load.addr), v_dst, MSA_B));
+      return v_dst;
+   }
 
-         /* 64HLto128(e1,e2) */
-         case Iop_64HLto128:
-            *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1);
-            *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2);
-            return;
+   if (e->tag == Iex_Get) {
+      HReg v_dst = newVRegV(env);
+#if defined(_MIPSEB)
+      HReg r_addr = newVRegI(env);
+      addInstr(env, MIPSInstr_Alu(mode64 ? Malu_DADD : Malu_ADD, r_addr, GuestStatePointer(mode64),
+                                  MIPSRH_Imm(False, e->Iex.Get.offset)));
+      addInstr(env, MIPSInstr_MsaMi10(MSA_LD, 0, r_addr, v_dst, MSA_B));
+#else
+      vassert(!(e->Iex.Get.offset & 7));
+      addInstr(env, MIPSInstr_MsaMi10(MSA_LD, e->Iex.Get.offset >> 3,
+                                      GuestStatePointer(mode64), v_dst, MSA_D));
+#endif
+      return v_dst;
+   }
 
-         case Iop_DivModU64to64:
-         case Iop_DivModS64to64: {
-            HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
-            HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
-            HReg tLo = newVRegI(env);
-            HReg tHi = newVRegI(env);
-            Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to64);
+   if (e->tag == Iex_Unop) {
+      IROp op_unop = e->Iex.Unop.op;
 
-            addInstr(env, MIPSInstr_Div(syned, False, r_srcL, r_srcR));
-            addInstr(env, MIPSInstr_Mfhi(tHi));
-            addInstr(env, MIPSInstr_Mflo(tLo));
-            *rHi = tHi;
-            *rLo = tLo;
-            return;
-         }
+      switch (op_unop) {
+         case Iop_Abs64x2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_help = newVRegV(env);
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SUBV, MSA_D, v_help, v_src, v_src));
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ADD_A, MSA_D,
+                                        v_dst, v_src, v_help));
+               return v_dst;
+            }
+
+         case Iop_Abs32x4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_help = newVRegV(env);
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SUBV, MSA_W, v_help, v_src, v_src));
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ADD_A, MSA_W,
+                                        v_dst, v_src, v_help));
+               return v_dst;
+            }
+
+         case Iop_Abs16x8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_help = newVRegV(env);
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SUBV, MSA_H, v_help, v_src, v_src));
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ADD_A, MSA_H,
+                                        v_dst, v_src, v_help));
+               return v_dst;
+            }
+
+         case Iop_Abs8x16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_help = newVRegV(env);
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SUBV, MSA_B, v_help, v_src, v_src));
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ADD_A, MSA_B,
+                                        v_dst, v_src, v_help));
+               return v_dst;
+            }
+
+         case Iop_Cnt8x16: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg res = newVRegV(env);
+               addInstr(env, MIPSInstr_Msa2R(MSA_PCNT, MSA_B, v_src, res));
+               return res;
+            }
+
+         case Iop_NotV128: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               addInstr(env, MIPSInstr_MsaVec(MSA_NORV, v_dst, v_src, v_src));
+               return v_dst;
+            }
+
+         case Iop_Reverse8sIn16_x8: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_tmp = newVRegV(env);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVEV, MSA_B, v_tmp, v_src, v_src));
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVOD, MSA_B, v_src, v_tmp, v_src));
+               return v_src;
+            }
+
+         case Iop_Reverse8sIn32_x4: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_tmp = newVRegV(env);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVEV, MSA_H, v_tmp, v_src, v_src));
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVOD, MSA_H, v_src, v_tmp, v_src));
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVEV, MSA_B, v_tmp, v_src, v_src));
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVOD, MSA_B, v_src, v_tmp, v_src));
+               return v_src;
+            }
+
+         case Iop_Reverse8sIn64_x2: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_tmp = newVRegV(env);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVEV, MSA_W, v_tmp, v_src, v_src));
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVOD, MSA_W, v_src, v_tmp, v_src));
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVEV, MSA_H, v_tmp, v_src, v_src));
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVOD, MSA_H, v_src, v_tmp, v_src));
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVEV, MSA_B, v_tmp, v_src, v_src));
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVOD, MSA_B, v_src, v_tmp, v_src));
+               return v_src;
+            }
+
+         case Iop_Cls8x16: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst = newVRegV(env);
+               addInstr(env, MIPSInstr_Msa2R(MSA_NLOC, MSA_B, v_src, v_dst));
+               return v_dst;
+            }
+
+         case Iop_Cls16x8: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst = newVRegV(env);
+               addInstr(env, MIPSInstr_Msa2R(MSA_NLOC, MSA_H, v_src, v_dst));
+               return v_dst;
+            }
+
+         case Iop_Cls32x4: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst = newVRegV(env);
+               addInstr(env, MIPSInstr_Msa2R(MSA_NLOC, MSA_W, v_src, v_dst));
+               return v_dst;
+            }
+
+         case Iop_Clz8x16: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst = newVRegV(env);
+               addInstr(env, MIPSInstr_Msa2R(MSA_NLZC, MSA_B, v_src, v_dst));
+               return v_dst;
+            }
+
+         case Iop_Clz16x8: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst = newVRegV(env);
+               addInstr(env, MIPSInstr_Msa2R(MSA_NLZC, MSA_H, v_src, v_dst));
+               return v_dst;
+            }
+
+         case Iop_Clz32x4: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst = newVRegV(env);
+               addInstr(env, MIPSInstr_Msa2R(MSA_NLZC, MSA_W, v_src, v_dst));
+               return v_dst;
+            }
+
+         case Iop_Clz64x2: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst = newVRegV(env);
+               addInstr(env, MIPSInstr_Msa2R(MSA_NLZC, MSA_D, v_src, v_dst));
+               return v_dst;
+            }
+
+         case Iop_Abs32Fx4: {
+               HReg v_src  = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst  = newVRegV(env);
+               HReg v_help = newVRegV(env);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FMUL, MSA_F_WH,
+                                         v_help, v_src, v_src));
+               addInstr(env,
+                        MIPSInstr_Msa2RF(MSA_FSQRT, MSA_F_WH, v_dst, v_help));
+               return v_dst;
+            }
+
+         case Iop_Abs64Fx2: {
+               HReg v_src  = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst  = newVRegV(env);
+               HReg v_help = newVRegV(env);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FMUL, MSA_F_DW,
+                                         v_help, v_src, v_src));
+               addInstr(env,
+                        MIPSInstr_Msa2RF(MSA_FSQRT, MSA_F_DW, v_dst, v_help));
+               return v_dst;
+            }
+
+         case Iop_RecipEst32Fx4: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst = newVRegV(env);
+               set_guest_MIPS_rounding_mode_MSA(env);
+               addInstr(env,
+                        MIPSInstr_Msa2RF(MSA_FRCP, MSA_F_WH, v_dst, v_src));
+               set_MIPS_rounding_default_MSA(env);
+               return v_dst;
+            }
+
+         case Iop_RecipEst64Fx2: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst = newVRegV(env);
+               set_guest_MIPS_rounding_mode_MSA(env);
+               addInstr(env,
+                        MIPSInstr_Msa2RF(MSA_FRCP, MSA_F_DW, v_dst, v_src));
+               set_MIPS_rounding_default_MSA(env);
+               return v_dst;
+            }
+
+         case Iop_RSqrtEst32Fx4: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst = newVRegV(env);
+               set_guest_MIPS_rounding_mode_MSA(env);
+               addInstr(env,
+                        MIPSInstr_Msa2RF(MSA_FRSQRT, MSA_F_WH, v_dst, v_src));
+               set_MIPS_rounding_default_MSA(env);
+               return v_dst;
+            }
+
+         case Iop_RSqrtEst64Fx2: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst = newVRegV(env);
+               set_guest_MIPS_rounding_mode_MSA(env);
+               addInstr(env,
+                        MIPSInstr_Msa2RF(MSA_FRSQRT, MSA_F_DW, v_dst, v_src));
+               set_MIPS_rounding_default_MSA(env);
+               return v_dst;
+            }
+
+         case Iop_F16toF32x4: {
+               HReg v_dst = newVRegV(env);
+
+               if (mode64) {
+                  HReg r_src;
+                  r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
+                  addInstr(env,
+                           MIPSInstr_Msa2R(MSA_FILL, MSA_D, r_src, v_dst));
+                  addInstr(env,
+                           MIPSInstr_MsaElm(MSA_INSERT, r_src, v_dst,
+                                            MSA_DFN_D | 1));
+               } else {
+                  HReg r_srch, r_srcl;
+                  iselInt64Expr(&r_srch, &r_srcl, env, e->Iex.Unop.arg);
+                  addInstr(env,
+                           MIPSInstr_Msa2R(MSA_FILL, MSA_W, r_srcl, v_dst));
+                  addInstr(env,
+                           MIPSInstr_MsaElm(MSA_INSERT, r_srch, v_dst,
+                                            MSA_DFN_W | 1));
+                  addInstr(env,
+                           MIPSInstr_MsaElm(MSA_INSERT, r_srcl, v_dst,
+                                            MSA_DFN_W | 2));
+                  addInstr(env,
+                           MIPSInstr_MsaElm(MSA_INSERT, r_srch, v_dst,
+                                            MSA_DFN_W | 3));
+               }
+
+               addInstr(env,
+                        MIPSInstr_Msa2RF(MSA_FEXUPR, MSA_F_WH, v_dst, v_dst));
+               return v_dst;
+            }
+
+         case Iop_I32UtoFx4: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst = newVRegV(env);
+               set_guest_MIPS_rounding_mode_MSA(env);
+               addInstr(env,
+                        MIPSInstr_Msa2RF(MSA_FFINT_U, MSA_F_WH, v_dst, v_src));
+               set_MIPS_rounding_default_MSA(env);
+               return v_dst;
+            }
+
+         case Iop_FtoI32Sx4_RZ: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst = newVRegV(env);
+               addInstr(env,
+                        MIPSInstr_Msa2RF(MSA_FTRUNC_S, MSA_F_WH, v_dst, v_src));
+               return v_dst;
+            }
+
+         case Iop_FtoI32Ux4_RZ: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst = newVRegV(env);
+               addInstr(env,
+                        MIPSInstr_Msa2RF(MSA_FTRUNC_U, MSA_F_WH, v_dst, v_src));
+               return v_dst;
+            }
+
+         case Iop_Log2_32Fx4: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst = newVRegV(env);
+               addInstr(env,
+                        MIPSInstr_Msa2RF(MSA_FLOG2, MSA_F_WH, v_dst, v_src));
+               return v_dst;
+            }
+
+         case Iop_Log2_64Fx2: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst = newVRegV(env);
+               addInstr(env,
+                        MIPSInstr_Msa2RF(MSA_FLOG2, MSA_F_DW, v_dst, v_src));
+               return v_dst;
+            }
+         case Iop_CmpNEZ8x16: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst = newVRegV(env);
+               HReg zero = Zero(mode64);
+               addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, zero, v_dst));
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_CEQ, MSA_B, v_dst, v_src, v_dst));
+               addInstr(env, MIPSInstr_MsaVec(MSA_NORV, v_dst, v_dst, v_dst));
+               return v_dst;
+            }
+         case Iop_CmpNEZ16x8: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst = newVRegV(env);
+               HReg zero = Zero(mode64);
+               addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, zero, v_dst));
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_CEQ, MSA_H, v_dst, v_src, v_dst));
+               addInstr(env, MIPSInstr_MsaVec(MSA_NORV, v_dst, v_dst, v_dst));
+               return v_dst;
+            }
+          case Iop_CmpNEZ32x4: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst = newVRegV(env);
+               HReg zero = Zero(mode64);
+               addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, zero, v_dst));
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_CEQ, MSA_W, v_dst, v_src, v_dst));
+               addInstr(env, MIPSInstr_MsaVec(MSA_NORV, v_dst, v_dst, v_dst));
+               return v_dst;
+            }
+          case Iop_CmpNEZ64x2: {
+               HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+               HReg v_dst = newVRegV(env);
+               HReg zero = Zero(mode64);
+               addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, zero, v_dst));
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_CEQ, MSA_D, v_dst, v_src, v_dst));
+               addInstr(env, MIPSInstr_MsaVec(MSA_NORV, v_dst, v_dst, v_dst));
+               return v_dst;
+            }
+         default:
+            vex_printf("iselV128Expr_wrk: Unsupported unop: %u\n", op_unop);
+      }
+   }
+
+   if (e->tag == Iex_Binop) {
+      IROp op_binop = e->Iex.Binop.op;
+
+      switch (op_binop) {
+         case Iop_Add8x16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ADDV, MSA_B,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Add16x8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ADDV, MSA_H,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Add32x4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ADDV, MSA_W,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Add64x2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ADDV, MSA_D,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Sub8x16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SUBV, MSA_B,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Sub16x8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SUBV, MSA_H,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Sub32x4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SUBV, MSA_W,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Sub64x2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SUBV, MSA_D,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_QAdd8Sx16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ADDS_S, MSA_B,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_QAdd16Sx8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ADDS_S, MSA_H,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_QAdd32Sx4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ADDS_S, MSA_W,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_QAdd64Sx2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ADDS_S, MSA_D,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_QAdd8Ux16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ADDS_U, MSA_B,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_QAdd16Ux8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ADDS_U, MSA_H,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_QAdd32Ux4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ADDS_U, MSA_W,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_QAdd64Ux2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ADDS_U, MSA_D,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_QSub8Sx16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SUBS_S, MSA_B,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_QSub16Sx8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SUBS_S, MSA_H,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_QSub32Sx4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SUBS_S, MSA_W,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_QSub64Sx2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SUBS_S, MSA_D,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_QSub8Ux16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SUBS_U, MSA_B,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_QSub16Ux8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SUBS_U, MSA_H,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_QSub32Ux4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SUBS_U, MSA_W,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_QSub64Ux2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SUBS_U, MSA_D,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_QDMulHi32Sx4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_MUL_Q, MSA_F_DW,
+                                         v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_QDMulHi16Sx8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_MUL_Q, MSA_F_WH,
+                                         v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_QRDMulHi32Sx4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_MULR_Q, MSA_F_DW,
+                                         v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_QRDMulHi16Sx8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_MULR_Q, MSA_F_WH,
+                                         v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Max8Sx16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_MAX_S, MSA_B,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Max16Sx8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_MAX_S, MSA_H,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Max32Sx4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_MAX_S, MSA_W,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Max64Sx2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_MAX_S, MSA_D,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Max8Ux16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_MAX_U, MSA_B,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Max16Ux8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_MAX_U, MSA_H,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Max32Ux4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_MAX_U, MSA_W,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Max64Ux2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_MAX_U, MSA_D,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Min8Sx16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_MIN_S, MSA_B,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Min16Sx8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_MIN_S, MSA_H,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Min32Sx4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_MIN_S, MSA_W,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Min64Sx2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_MIN_S, MSA_D,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Min8Ux16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_MIN_U, MSA_B,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Min16Ux8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_MIN_U, MSA_H,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Min32Ux4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_MIN_U, MSA_W,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Min64Ux2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_MIN_U, MSA_D,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Shl8x16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SLL, MSA_B, v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Shl16x8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SLL, MSA_H, v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Shl32x4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SLL, MSA_W, v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Shl64x2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SLL, MSA_D, v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Shr8x16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SRL, MSA_B, v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Shr16x8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SRL, MSA_H,  v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Shr32x4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SRL, MSA_W, v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Shr64x2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SRL, MSA_D, v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Sar8x16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SRA, MSA_B, v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Sar16x8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SRA, MSA_H, v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Sar32x4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SRA, MSA_W, v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Sar64x2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SRA, MSA_D, v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_InterleaveHI8x16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVL, MSA_B, v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_InterleaveHI16x8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVL, MSA_H,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_InterleaveHI32x4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVL, MSA_W,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_InterleaveHI64x2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVL, MSA_D,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_InterleaveLO8x16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVR, MSA_B,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_InterleaveLO16x8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVR, MSA_H,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_InterleaveLO32x4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVR, MSA_W,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_InterleaveLO64x2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVR, MSA_D,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_InterleaveEvenLanes8x16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVEV, MSA_B,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_InterleaveEvenLanes16x8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVEV, MSA_H,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_InterleaveEvenLanes32x4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVEV, MSA_W,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_InterleaveOddLanes8x16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVOD, MSA_B,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_InterleaveOddLanes16x8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVOD, MSA_H,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_InterleaveOddLanes32x4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_ILVOD, MSA_W,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+          case Iop_PackEvenLanes8x16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_PCKEV, MSA_B,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_PackEvenLanes16x8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_PCKEV, MSA_H,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_PackEvenLanes32x4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_PCKEV, MSA_W,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_PackOddLanes8x16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_PCKOD, MSA_B,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_PackOddLanes16x8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_PCKOD, MSA_H,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_PackOddLanes32x4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_PCKOD, MSA_W,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_CmpEQ8x16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_CEQ, MSA_B, v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_CmpEQ16x8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_CEQ, MSA_H, v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_CmpEQ32x4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_CEQ, MSA_W, v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_CmpEQ64x2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_CEQ, MSA_D, v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_CmpGT8Sx16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_CLT_S, MSA_B,
+                                        v_dst, v_src2, v_src1));
+               return v_dst;
+            }
+
+         case Iop_CmpGT16Sx8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_CLT_S, MSA_H,
+                                        v_dst, v_src2, v_src1));
+               return v_dst;
+            }
+
+         case Iop_CmpGT32Sx4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_CLT_S, MSA_W,
+                                        v_dst, v_src2, v_src1));
+               return v_dst;
+            }
+
+         case Iop_CmpGT64Sx2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_CLT_S, MSA_D,
+                                        v_dst, v_src2, v_src1));
+               return v_dst;
+            }
+
+         case Iop_CmpGT8Ux16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_CLT_U, MSA_B,
+                                        v_dst, v_src2, v_src1));
+               return v_dst;
+            }
+
+         case Iop_CmpGT16Ux8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_CLT_U, MSA_H,
+                                        v_dst, v_src2, v_src1));
+               return v_dst;
+            }
+
+         case Iop_CmpGT32Ux4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_CLT_U, MSA_W,
+                                        v_dst, v_src2, v_src1));
+               return v_dst;
+            }
+
+         case Iop_CmpGT64Ux2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_CLT_U, MSA_D,
+                                        v_dst, v_src2, v_src1));
+               return v_dst;
+            }
+
+         case Iop_Avg8Sx16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_AVER_S, MSA_B,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Avg16Sx8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_AVER_S, MSA_H,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Avg32Sx4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_AVER_S, MSA_W,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Avg8Ux16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_AVER_U, MSA_B,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Avg16Ux8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_AVER_U, MSA_H,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Avg32Ux4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_AVER_U, MSA_W,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Mul8x16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_MULV, MSA_B,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Mul16x8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_MULV, MSA_H,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Mul32x4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_MULV, MSA_W,
+                                        v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_AndV128: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env, MIPSInstr_MsaVec(MSA_ANDV, v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_OrV128: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env, MIPSInstr_MsaVec(MSA_ORV, v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_XorV128: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env, MIPSInstr_MsaVec(MSA_XORV, v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_ShrV128: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               MIPSRH *sm;
+               sm = iselWordExpr_RH7u(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SUBV, MSA_B,
+                                        v_dst, v_src1, v_src1));
+
+               if (sm->tag == Mrh_Imm) {
+                  int n = (sm->Mrh.Imm.imm16) >> 3;
+                  addInstr(env,
+                           MIPSInstr_MsaElm(MSA_SLDI, v_src1, v_dst,
+                                            MSA_DFN_B | n));
+               } else {
+                  HReg v_src2 = sm->Mrh.Reg.reg;
+                  MIPSRH *ri = MIPSRH_Imm(False, 3);
+                  HReg r_dst = newVRegI(env);
+                  addInstr(env, MIPSInstr_Shft(Mshft_SRL, True /*32bit shift */,
+                                               r_dst, v_src2, ri));
+                  addInstr(env,
+                           MIPSInstr_Msa3R(MSA_SLD, MSA_B,
+                                           v_dst, v_src1, r_dst));
+               }
+
+               return v_dst;
+            }
+
+         case Iop_ShlV128: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               MIPSRH *sm;
+               sm = iselWordExpr_RH7u(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3R(MSA_SUBV, MSA_B,
+                                        v_dst, v_src1, v_src1));
+
+               if (sm->tag == Mrh_Imm) {
+                  int n = 16 - ((sm->Mrh.Imm.imm16) >> 3);
+
+                  if (n == 16) n = 0;
+
+                  addInstr(env,
+                           MIPSInstr_MsaElm(MSA_SLDI, v_dst, v_src1,
+                                            MSA_DFN_B | n));
+               } else {
+                  HReg v_src2 = sm->Mrh.Reg.reg;
+                  MIPSRH *ri = MIPSRH_Imm(False, 3);
+                  HReg r_dst = newVRegI(env);
+                  HReg help = newVRegI(env);
+                  addInstr(env, MIPSInstr_Alu(Malu_XOR, help, v_src2, sm));
+                  addInstr(env, MIPSInstr_Alu(Malu_SUB, help, help, sm));
+                  addInstr(env, MIPSInstr_Shft(Mshft_SRL, True /*32bit shift */,
+                                               r_dst, help, ri));
+                  addInstr(env,
+                           MIPSInstr_Msa3R(MSA_SLD, MSA_B,
+                                           v_src1, v_dst, r_dst));
+               }
+
+               return v_src1;
+            }
+
+         case Iop_ShlN8x16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               vassert(e->Iex.Binop.arg2->tag == Iex_Const);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63);
+               addInstr(env,
+                        MIPSInstr_MsaBit(MSA_SLLI, MSA_B,
+                                         e->Iex.Binop.arg2->Iex.Const.con->Ico.U8,
+                                         v_src1, v_dst));
+               return v_dst;
+            }
+
+         case Iop_ShlN16x8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               vassert(e->Iex.Binop.arg2->tag == Iex_Const);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63);
+               addInstr(env,
+                        MIPSInstr_MsaBit(MSA_SLLI, MSA_H,
+                                         e->Iex.Binop.arg2->Iex.Const.con->Ico.U8,
+                                         v_src1, v_dst));
+               return v_dst;
+            }
+
+         case Iop_ShlN32x4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               vassert(e->Iex.Binop.arg2->tag == Iex_Const);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63);
+               addInstr(env,
+                        MIPSInstr_MsaBit(MSA_SLLI, MSA_W,
+                                         e->Iex.Binop.arg2->Iex.Const.con->Ico.U8,
+                                         v_src1, v_dst));
+               return v_dst;
+            }
+
+         case Iop_ShlN64x2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               vassert(e->Iex.Binop.arg2->tag == Iex_Const);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63);
+               addInstr(env,
+                        MIPSInstr_MsaBit(MSA_SLLI, MSA_D,
+                                         e->Iex.Binop.arg2->Iex.Const.con->Ico.U8,
+                                         v_src1, v_dst));
+               return v_dst;
+            }
+
+         case Iop_SarN8x16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               vassert(e->Iex.Binop.arg2->tag == Iex_Const);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63);
+               addInstr(env,
+                        MIPSInstr_MsaBit(MSA_SRAI, MSA_B,
+                                         e->Iex.Binop.arg2->Iex.Const.con->Ico.U8,
+                                         v_src1, v_dst));
+               return v_dst;
+            }
+
+         case Iop_SarN16x8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               vassert(e->Iex.Binop.arg2->tag == Iex_Const);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63);
+               addInstr(env,
+                        MIPSInstr_MsaBit(MSA_SRAI, MSA_H,
+                                         e->Iex.Binop.arg2->Iex.Const.con->Ico.U8,
+                                         v_src1, v_dst));
+               return v_dst;
+            }
+
+         case Iop_SarN32x4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               vassert(e->Iex.Binop.arg2->tag == Iex_Const);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63);
+               addInstr(env,
+                        MIPSInstr_MsaBit(MSA_SRAI, MSA_W,
+                                         e->Iex.Binop.arg2->Iex.Const.con->Ico.U8,
+                                         v_src1, v_dst));
+               return v_dst;
+            }
+
+         case Iop_SarN64x2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               vassert(e->Iex.Binop.arg2->tag == Iex_Const);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63);
+               addInstr(env,
+                        MIPSInstr_MsaBit(MSA_SRAI, MSA_D,
+                                         e->Iex.Binop.arg2->Iex.Const.con->Ico.U8,
+                                         v_src1, v_dst));
+               return v_dst;
+            }
+
+         case Iop_ShrN8x16: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               vassert(e->Iex.Binop.arg2->tag == Iex_Const);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63);
+               addInstr(env,
+                        MIPSInstr_MsaBit(MSA_SRLI, MSA_B,
+                                         e->Iex.Binop.arg2->Iex.Const.con->Ico.U8,
+                                         v_src1, v_dst));
+               return v_dst;
+            }
+
+         case Iop_ShrN16x8: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               vassert(e->Iex.Binop.arg2->tag == Iex_Const);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63);
+               addInstr(env,
+                        MIPSInstr_MsaBit(MSA_SRLI, MSA_H,
+                                         e->Iex.Binop.arg2->Iex.Const.con->Ico.U8,
+                                         v_src1, v_dst));
+               return v_dst;
+            }
+
+         case Iop_ShrN32x4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               vassert(e->Iex.Binop.arg2->tag == Iex_Const);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63);
+               addInstr(env,
+                        MIPSInstr_MsaBit(MSA_SRLI, MSA_W,
+                                         e->Iex.Binop.arg2->Iex.Const.con->Ico.U8,
+                                         v_src1, v_dst));
+               return v_dst;
+            }
+
+         case Iop_ShrN64x2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               vassert(e->Iex.Binop.arg2->tag == Iex_Const);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63);
+               addInstr(env,
+                        MIPSInstr_MsaBit(MSA_SRLI, MSA_D,
+                                         e->Iex.Binop.arg2->Iex.Const.con->Ico.U8,
+                                         v_src1, v_dst));
+               return v_dst;
+            }
+
+         case Iop_QandQSarNnarrow64Sto32Sx2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               vassert(e->Iex.Binop.arg2->tag == Iex_Const);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63);
+               addInstr(env,
+                        MIPSInstr_MsaBit(MSA_SRAI, MSA_D,
+                                         e->Iex.Binop.arg2->Iex.Const.con->Ico.U8,
+                                         v_src1, v_dst));
+               addInstr(env, MIPSInstr_MsaBit(MSA_SAT_S, MSA_D, 31, v_dst, v_dst));
+               return v_dst;
+            }
+
+         case Iop_QandQSarNnarrow32Sto16Sx4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               vassert(e->Iex.Binop.arg2->tag == Iex_Const);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63);
+               addInstr(env,
+                        MIPSInstr_MsaBit(MSA_SRAI, MSA_W,
+                                         e->Iex.Binop.arg2->Iex.Const.con->Ico.U8,
+                                         v_src1, v_dst));
+               addInstr(env,
+                        MIPSInstr_MsaBit(MSA_SAT_S, MSA_W, 15, v_dst, v_dst));
+               return v_dst;
+            }
+
+         case Iop_QandQRSarNnarrow64Sto32Sx2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               vassert(e->Iex.Binop.arg2->tag == Iex_Const);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63);
+               addInstr(env,
+                        MIPSInstr_MsaBit(MSA_SRARI, MSA_D,
+                                         e->Iex.Binop.arg2->Iex.Const.con->Ico.U8,
+                                         v_src1, v_dst));
+               addInstr(env,
+                        MIPSInstr_MsaBit(MSA_SAT_S, MSA_D, 31, v_dst, v_dst));
+               return v_dst;
+            }
+
+         case Iop_QandQRSarNnarrow32Sto16Sx4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               vassert(e->Iex.Binop.arg2->tag == Iex_Const);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
+               vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63);
+               addInstr(env,
+                        MIPSInstr_MsaBit(MSA_SRARI, MSA_W,
+                                         e->Iex.Binop.arg2->Iex.Const.con->Ico.U8,
+                                         v_src1, v_dst));
+               addInstr(env,
+                        MIPSInstr_MsaBit(MSA_SAT_S, MSA_W, 15, v_dst, v_dst));
+               return v_dst;
+            }
+
+         case Iop_CmpEQ32Fx4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FCEQ, MSA_F_WH,
+                                         v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_CmpEQ64Fx2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FCEQ, MSA_F_DW,
+                                         v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_CmpLT32Fx4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FCLT, MSA_F_WH,
+                                         v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_CmpLT64Fx2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FCLT, MSA_F_DW,
+                                         v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_CmpLE32Fx4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FCLE, MSA_F_WH,
+                                         v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_CmpLE64Fx2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FCLE, MSA_F_DW,
+                                         v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_CmpUN32Fx4: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FCUN, MSA_F_WH,
+                                         v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_CmpUN64Fx2: {
+               HReg v_dst = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FCUN, MSA_F_DW,
+                                         v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_64HLtoV128: {
+               HReg v_dst = newVRegV(env);
+
+               if (mode64) {
+                  HReg r_src1;
+                  HReg r_src2;
+                  r_src1 = iselWordExpr_R(env, e->Iex.Binop.arg1);
+                  r_src2 = iselWordExpr_R(env, e->Iex.Binop.arg2);
+                  addInstr(env,
+                           MIPSInstr_Msa2R(MSA_FILL, MSA_D, r_src2, v_dst));
+                  addInstr(env,
+                           MIPSInstr_MsaElm(MSA_INSERT, r_src1, v_dst,
+                                            MSA_DFN_D | 1));
+               } else {
+                  HReg r_src1h, r_src1l;
+                  HReg r_src2h, r_src2l;
+                  iselInt64Expr(&r_src1h, &r_src1l, env, e->Iex.Binop.arg1);
+                  iselInt64Expr(&r_src2h, &r_src2l, env, e->Iex.Binop.arg2);
+                  addInstr(env,
+                           MIPSInstr_Msa2R(MSA_FILL, MSA_W, r_src2l, v_dst));
+                  addInstr(env,
+                           MIPSInstr_MsaElm(MSA_INSERT, r_src2h, v_dst,
+                                            MSA_DFN_W | 1));
+                  addInstr(env,
+                           MIPSInstr_MsaElm(MSA_INSERT, r_src1l, v_dst,
+                                            MSA_DFN_W | 2));
+                  addInstr(env,
+                           MIPSInstr_MsaElm(MSA_INSERT, r_src1h, v_dst,
+                                            MSA_DFN_W | 3));
+               }
+
+               return v_dst;
+            }
+
+         case Iop_Min32Fx4: {
+               HReg v_src1  = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2  = iselV128Expr(env, e->Iex.Binop.arg2);
+               HReg v_dst = newVRegV(env);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FMIN, MSA_F_WH,
+                                         v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Min64Fx2: {
+               HReg v_src1  = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2  = iselV128Expr(env, e->Iex.Binop.arg2);
+               HReg v_dst = newVRegV(env);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FMIN, MSA_F_DW,
+                                         v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Max32Fx4: {
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               HReg v_dst  = newVRegV(env);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FMAX, MSA_F_WH,
+                                         v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Max64Fx2: {
+               HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2);
+               HReg v_dst  = newVRegV(env);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FMAX, MSA_F_DW,
+                                         v_dst, v_src1, v_src2));
+               return v_dst;
+            }
+
+         case Iop_Sqrt32Fx4: {
+               HReg v_src = iselV128Expr(env, e->Iex.Binop.arg2);
+               HReg v_dst = newVRegV(env);
+               set_MIPS_rounding_mode_MSA(env, e->Iex.Binop.arg1);
+               addInstr(env,
+                        MIPSInstr_Msa2RF(MSA_FSQRT, MSA_F_WH, v_dst, v_src));
+               set_MIPS_rounding_default_MSA(env);
+               return v_dst;
+            }
+
+         case Iop_Sqrt64Fx2: {
+               HReg v_src = iselV128Expr(env, e->Iex.Binop.arg2);
+               HReg v_dst = newVRegV(env);
+               set_MIPS_rounding_mode_MSA(env, e->Iex.Binop.arg1);
+               addInstr(env,
+                        MIPSInstr_Msa2RF(MSA_FSQRT, MSA_F_DW, v_dst, v_src));
+               set_MIPS_rounding_default_MSA(env);
+               return v_dst;
+            }
+
+         default:
+            vex_printf("iselV128Expr_wrk: unsupported binop: %x\n", op_binop);
+      }
+   }
+
+   if (e->tag == Iex_Triop) {
+      IROp op_triop = e->Iex.Triop.details->op;
+
+      switch (op_triop) {
+         case Iop_Add32Fx4: {
+               HReg v_dst  = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3);
+               set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FADD, MSA_F_WH,
+                                         v_dst, v_src1, v_src2));
+               set_MIPS_rounding_default_MSA(env);
+               return v_dst;
+            }
+
+         case Iop_Add64Fx2: {
+               HReg v_dst  = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3);
+               set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FADD, MSA_F_DW,
+                                         v_dst, v_src1, v_src2));
+               set_MIPS_rounding_default_MSA(env);
+               return v_dst;
+            }
+
+         case Iop_Sub32Fx4: {
+               HReg v_dst  = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3);
+               set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FSUB, MSA_F_WH,
+                                         v_dst, v_src1, v_src2));
+               set_MIPS_rounding_default_MSA(env);
+               return v_dst;
+            }
+
+         case Iop_Sub64Fx2: {
+               HReg v_dst  = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3);
+               set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FSUB, MSA_F_DW,
+                                         v_dst, v_src1, v_src2));
+               set_MIPS_rounding_default_MSA(env);
+               return v_dst;
+            }
+
+         case Iop_Mul32Fx4: {
+               HReg v_dst  = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3);
+               set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FMUL, MSA_F_WH,
+                                         v_dst, v_src1, v_src2));
+               set_MIPS_rounding_default_MSA(env);
+               return v_dst;
+            }
+
+         case Iop_Mul64Fx2: {
+               HReg v_dst  = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3);
+               set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FMUL, MSA_F_DW,
+                                         v_dst, v_src1, v_src2));
+               set_MIPS_rounding_default_MSA(env);
+               return v_dst;
+            }
+
+         case Iop_Div32Fx4: {
+               HReg v_dst  = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3);
+               set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FDIV, MSA_F_WH,
+                                         v_dst, v_src1, v_src2));
+               set_MIPS_rounding_default_MSA(env);
+               return v_dst;
+            }
+
+         case Iop_Div64Fx2: {
+               HReg v_dst  = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3);
+               set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FDIV, MSA_F_DW,
+                                         v_dst, v_src1, v_src2));
+               set_MIPS_rounding_default_MSA(env);
+               return v_dst;
+            }
+
+         case Iop_F32x4_2toQ16x8: {
+               HReg v_dst  = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3);
+               set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FTQ, MSA_F_WH,
+                                         v_dst, v_src1, v_src2));
+               set_MIPS_rounding_default_MSA(env);
+               return v_dst;
+            }
+
+         case Iop_F64x2_2toQ32x4: {
+               HReg v_dst  = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3);
+               set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FTQ, MSA_F_DW,
+                                         v_dst, v_src1, v_src2));
+               set_MIPS_rounding_default_MSA(env);
+               return v_dst;
+            }
+
+          case Iop_Scale2_32Fx4: {
+               HReg v_dst  = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3);
+               set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FEXP2, MSA_F_WH,
+                                         v_dst, v_src1, v_src2));
+               set_MIPS_rounding_default_MSA(env);
+               return v_dst;
+            }
+
+          case Iop_Scale2_64Fx2: {
+               HReg v_dst  = newVRegV(env);
+               HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2);
+               HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3);
+               set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1);
+               addInstr(env,
+                        MIPSInstr_Msa3RF(MSA_FEXP2, MSA_F_DW,
+                                         v_dst, v_src1, v_src2));
+               set_MIPS_rounding_default_MSA(env);
+               return v_dst;
+            }
+
+         default:
+            vex_printf("iselV128Expr_wrk: unsupported triop: %x\n", op_triop);
+      }
+   }
+
+   if (e->tag == Iex_Const) {
+      IRConst *con = e->Iex.Const.con;
+
+      if (con->tag != Ico_V128) {
+         vpanic("iselV128Expr.const(mips)");
+      } else {
+         HReg v_dst = newVRegV(env);
+         UShort val = con->Ico.V128;
+         HReg zero = Zero(mode64);
+
+         switch (val) {
+            case 0:  /* likely */
+               addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, zero, v_dst));
+               break;
+
+            default: {
+                  HReg r_tmp = newVRegI(env);
+                  UInt i;
+                  addInstr(env, MIPSInstr_LI(r_tmp, 0xfful));
+
+                  if (val & 1) {
+                     addInstr(env,
+                              MIPSInstr_Msa2R(MSA_FILL, MSA_B, r_tmp, v_dst));
+                  } else {
+                     addInstr(env,
+                              MIPSInstr_Msa2R(MSA_FILL, MSA_B, zero, v_dst));
+                  }
+
+                  for (i = 1; i < 16; i++) {
+                     val >>= 1;
+
+                     if (val & 1) {
+                        addInstr(env,
+                                 MIPSInstr_MsaElm(MSA_INSERT, r_tmp, v_dst,
+                                                  MSA_DFN_B | i));
+                     } else {
+                        addInstr(env,
+                                 MIPSInstr_MsaElm(MSA_INSERT, zero, v_dst,
+                                                  MSA_DFN_B | i));
+                     }
+                  }
+
+                  break;
+               }
+         }
+
+         return v_dst;
+      }
+   }
+
+   if (e->tag == Iex_ITE) {
+      HReg v_dst  = newVRegV(env);
+      HReg iff    = iselV128Expr(env, e->Iex.ITE.iffalse);
+      HReg ift    = iselV128Expr(env, e->Iex.ITE.iftrue);
+      HReg r_cond = iselWordExpr_R(env, e->Iex.ITE.cond);
+      addInstr(env, MIPSInstr_Shft(Mshft_SLL, True, r_cond, r_cond,
+                                   MIPSRH_Imm(False, 1)));
+      addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, r_cond, v_dst));
+      addInstr(env,
+               MIPSInstr_Alu(Malu_ADD, r_cond, r_cond, MIPSRH_Imm(True, 1)));
+      addInstr(env, MIPSInstr_MsaElm(MSA_INSERT, r_cond, v_dst, MSA_DFN_W | 2));
+      addInstr(env, MIPSInstr_Msa3R(MSA_VSHF, MSA_D, v_dst, ift, iff));
+      return v_dst;
+   }
+
+   vex_printf("iselV128Expr_wrk: Unsupported tag: %x\n", e->tag);
+   ppIRExpr(e);
+   vpanic("iselV128Expr(mips)");
+}
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Integer expressions (128 bit)               ---*/
+/*---------------------------------------------------------*/
+
+/* 64-bit mode ONLY: compute a 128-bit value into a register pair,
+   which is returned as the first two parameters.  As with
+   iselWordExpr_R, these may be either real or virtual regs; in any
+   case they must not be changed by subsequent code emitted by the
+   caller.  */
+
+static void iselInt128Expr(HReg * rHi, HReg * rLo, ISelEnv * env, IRExpr * e)
+{
+   vassert(env->mode64);
+   iselInt128Expr_wrk(rHi, rLo, env, e);
+   vassert(hregClass(*rHi) == HRcGPR(env->mode64));
+   vassert(hregIsVirtual(*rHi));
+   vassert(hregClass(*rLo) == HRcGPR(env->mode64));
+   vassert(hregIsVirtual(*rLo));
+}
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static void iselInt128Expr_wrk(HReg * rHi, HReg * rLo, ISelEnv * env,
+                               IRExpr * e)
+{
+   vassert(e);
+   vassert(typeOfIRExpr(env->type_env, e) == Ity_I128);
+
+   /* read 128-bit IRTemp */
+   if (e->tag == Iex_RdTmp) {
+      lookupIRTempPair(rHi, rLo, env, e->Iex.RdTmp.tmp);
+      return;
+   }
+
+   /* --------- BINARY ops --------- */
+   if (e->tag == Iex_Binop) {
+      switch (e->Iex.Binop.op) {
+         /* 64 x 64 -> 128 multiply */
+         case Iop_MullU64:
+         case Iop_MullS64: {
+            HReg tLo = newVRegI(env);
+            HReg tHi = newVRegI(env);
+            Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
+            HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+            HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+            addInstr(env, MIPSInstr_Mult(syned, r_srcL, r_srcR));
+            addInstr(env, MIPSInstr_Mfhi(tHi));
+            addInstr(env, MIPSInstr_Mflo(tLo));
+            *rHi = tHi;
+            *rLo = tLo;
+            return;
+         }
+
+         /* 64HLto128(e1,e2) */
+         case Iop_64HLto128:
+            *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1);
+            *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2);
+            return;
+
+         case Iop_DivModU64to64:
+         case Iop_DivModS64to64: {
+            HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
+            HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
+            HReg tLo = newVRegI(env);
+            HReg tHi = newVRegI(env);
+            Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to64);
+
+            addInstr(env, MIPSInstr_Div(syned, False, r_srcL, r_srcR));
+            addInstr(env, MIPSInstr_Mfhi(tHi));
+            addInstr(env, MIPSInstr_Mflo(tLo));
+            *rHi = tHi;
+            *rLo = tLo;
+            return;
+         }
 
          default:
             break;
@@ -2344,6 +4787,27 @@ static void iselInt64Expr_wrk(HReg * rHi, HReg * rLo, ISelEnv * env, IRExpr * e)
       return;
    }
 
+   if (e->tag == Iex_CCall) {
+      HReg r_dstH = newVRegI(env);
+      HReg r_dstL = newVRegI(env);
+      vassert(e->Iex.CCall.retty == Ity_I64);
+
+      /* Marshal args, do the call, clear stack. */
+      UInt   addToSp = 0;
+      RetLoc rloc    = mk_RetLoc_INVALID();
+      doHelperCall(&addToSp, &rloc, env, NULL/*guard*/, e->Iex.CCall.cee,
+                   e->Iex.CCall.retty, e->Iex.CCall.args );
+
+      vassert(is_sane_RetLoc(rloc));
+      vassert(rloc.pri == RLPri_2Int);
+      vassert(addToSp == 0);
+      addInstr(env, mk_iMOVds_RR(r_dstL, hregMIPS_GPR2(False)));
+      addInstr(env, mk_iMOVds_RR(r_dstH, hregMIPS_GPR3(False)));
+      *rHi = r_dstH;
+      *rLo = r_dstL;
+      return;
+   }
+
    /* --------- BINARY ops --------- */
    if (e->tag == Iex_Binop) {
       IROp op_binop = e->Iex.Binop.op;
@@ -2765,6 +5229,129 @@ static void iselInt64Expr_wrk(HReg * rHi, HReg * rLo, ISelEnv * env, IRExpr * e)
 
             return;
          }
+         case Iop_F64toI64U: {
+            HReg r_src;
+            HReg tmp = newVRegV(env);
+            vassert(has_msa);
+            r_src = iselDblExpr( env, e->Iex.Binop.arg2);
+            set_MIPS_rounding_mode_MSA(env, e->Iex.Binop.arg1);
+            addInstr(env, MIPSInstr_Msa2RF(MSA_FTINT_U, MSA_F_DW, tmp, r_src));
+            HReg r_dsth = newVRegI(env);
+            HReg r_dstl = newVRegI(env);
+            addInstr(env,
+                     MIPSInstr_MsaElm(MSA_COPY_S, tmp, r_dstl, MSA_DFN_W | 0));
+            addInstr(env,
+                     MIPSInstr_MsaElm(MSA_COPY_S, tmp, r_dsth, MSA_DFN_W | 1));
+            *rHi = r_dsth;
+            *rLo = r_dstl;
+            set_MIPS_rounding_default_MSA(env);
+            return;
+         }
+
+         case Iop_GetElem64x2: {
+            vassert(has_msa);
+            HReg v_src = iselV128Expr(env, e->Iex.Binop.arg1);
+            HReg r_dstHI = newVRegI(env);
+            HReg r_dstLO = newVRegI(env);
+            MIPSRH *tmp = iselWordExpr_RH(env, False, e->Iex.Binop.arg2);
+
+            switch (tmp->tag) {
+               case Mrh_Imm:
+                  addInstr(env,
+                           MIPSInstr_MsaElm(MSA_COPY_S, v_src, r_dstHI,
+                                            MSA_DFN_W |
+                                            (((tmp->Mrh.Imm.imm16 & 0x01) << 1)
+                                            + 1)));
+                  addInstr(env,
+                           MIPSInstr_MsaElm(MSA_COPY_S, v_src, r_dstLO,
+                                            MSA_DFN_W |
+                                            ((tmp->Mrh.Imm.imm16 & 0x01) << 1)));
+                  break;
+
+               case Mrh_Reg: {
+                     HReg v_tmp = newVRegV(env);
+                     addInstr(env,
+                              MIPSInstr_Msa3R(MSA_SPLAT, MSA_D, v_tmp, v_src,
+                                              tmp->Mrh.Reg.reg));
+                     addInstr(env,
+                              MIPSInstr_MsaElm(MSA_COPY_S, v_tmp, r_dstHI,
+                                               MSA_DFN_W | 1));
+                     addInstr(env,
+                              MIPSInstr_MsaElm(MSA_COPY_S, v_tmp, r_dstLO,
+                                               MSA_DFN_W));
+                     break;
+                  }
+            }
+
+            *rHi = r_dstHI;
+            *rLo = r_dstLO;
+            return;
+         }
+
+         case Iop_Mul64: {
+            HReg a_L, a_H, b_L, b_H;
+            HReg dst_L = newVRegI(env);
+            HReg dst_H = newVRegI(env);
+
+            iselInt64Expr(&a_H, &a_L, env, e->Iex.Binop.arg1);
+            iselInt64Expr(&b_H, &b_L, env, e->Iex.Binop.arg2);
+            addInstr(env, MIPSInstr_Mul(dst_H, a_H, b_L));
+            addInstr(env, MIPSInstr_Mult(True, b_H, a_L));
+            addInstr(env, MIPSInstr_Mflo(dst_L));
+            addInstr(env, MIPSInstr_Alu(Malu_ADD, dst_H, dst_H,
+                                        MIPSRH_Reg(dst_L)));
+            addInstr(env, MIPSInstr_Mult(False, a_L, b_L));
+            addInstr(env, MIPSInstr_Mfhi(dst_L));
+
+            addInstr(env, MIPSInstr_Alu(Malu_ADD, dst_H, dst_H,
+                                        MIPSRH_Reg(dst_L)));
+            addInstr(env, MIPSInstr_Mflo(dst_L));
+            *rHi = dst_H;
+            *rLo = dst_L;
+            return;
+         }
+
+         case Iop_DivS64: {
+            HReg src1_L, src1_H, src2_L, src2_H;
+            HReg dst_L = newVRegI(env);
+            HReg dst_H = newVRegI(env);
+            HReg tmp1 = newVRegV(env);
+            HReg tmp2 = newVRegV(env);
+            vassert(has_msa);
+            iselInt64Expr(&src1_H, &src1_L, env, e->Iex.Binop.arg1);
+            iselInt64Expr(&src2_H, &src2_L, env, e->Iex.Binop.arg2);
+            addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, src1_L, tmp1));
+            addInstr(env, MIPSInstr_MsaElm(MSA_INSERT, src1_H, tmp1, MSA_DFN_W | 1));
+            addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, src2_L, tmp2));
+            addInstr(env, MIPSInstr_MsaElm(MSA_INSERT, src2_H, tmp2, MSA_DFN_W | 1));
+            addInstr(env, MIPSInstr_Msa3R(MSA_DIVS, MSA_D, tmp1, tmp1, tmp2));
+            addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, tmp1, dst_H, MSA_DFN_W | 1));
+            addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, tmp1, dst_L, MSA_DFN_W | 0));
+            *rHi = dst_H;
+            *rLo = dst_L;
+            return;
+         }
+
+         case Iop_DivU64: {
+            HReg src1_L, src1_H, src2_L, src2_H;
+            HReg dst_L = newVRegI(env);
+            HReg dst_H = newVRegI(env);
+            HReg tmp1 = newVRegV(env);
+            HReg tmp2 = newVRegV(env);
+            vassert(has_msa);
+            iselInt64Expr(&src1_H, &src1_L, env, e->Iex.Binop.arg1);
+            iselInt64Expr(&src2_H, &src2_L, env, e->Iex.Binop.arg2);
+            addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, src1_L, tmp1));
+            addInstr(env, MIPSInstr_MsaElm(MSA_INSERT, src1_H, tmp1, MSA_DFN_W | 1));
+            addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, src2_L, tmp2));
+            addInstr(env, MIPSInstr_MsaElm(MSA_INSERT, src2_H, tmp2, MSA_DFN_W | 1));
+            addInstr(env, MIPSInstr_Msa3R(MSA_DIVU, MSA_D, tmp1, tmp1, tmp2));
+            addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, tmp1, dst_H, MSA_DFN_W | 1));
+            addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, tmp1, dst_L, MSA_DFN_W | 0));
+            *rHi = dst_H;
+            *rLo = dst_L;
+            return;
+         }
 
          default:
             break;
@@ -2793,6 +5380,25 @@ static void iselInt64Expr_wrk(HReg * rHi, HReg * rLo, ISelEnv * env, IRExpr * e)
             return;
          }
 
+         case Iop_8Sto64:
+         case Iop_16Sto64: {
+            HReg tLo = newVRegI(env);
+            HReg tHi = newVRegI(env);
+            HReg src = iselWordExpr_R(env, e->Iex.Unop.arg);
+            UInt no_bits = (e->Iex.Unop.op == Iop_8Sto64) ? 24 : 16;
+            addInstr(env, mk_iMOVds_RR(tLo, src));
+            addInstr(env, MIPSInstr_Shft(Mshft_SLL, True, tLo, tLo,
+                          MIPSRH_Imm(False, no_bits)));
+            addInstr(env, MIPSInstr_Shft(Mshft_SRA, True, tHi, tLo,
+                          MIPSRH_Imm(False, 31)));
+            addInstr(env, MIPSInstr_Shft(Mshft_SRA, True, tLo, tLo,
+                          MIPSRH_Imm(False, no_bits)));
+            addInstr(env, mk_iMOVds_RR(tHi, tLo));
+            *rHi = tHi;
+            *rLo = tLo;
+            return;
+         }
+
          /* 32Sto64(e) */
          case Iop_32Sto64: {
             HReg tLo = newVRegI(env);
@@ -2807,13 +5413,14 @@ static void iselInt64Expr_wrk(HReg * rHi, HReg * rLo, ISelEnv * env, IRExpr * e)
             return;
          }
 
-         /* 8Uto64(e) */
-         case Iop_8Uto64: {
+         case Iop_8Uto64:
+         case Iop_16Uto64: {
             HReg tLo = newVRegI(env);
             HReg tHi = newVRegI(env);
             HReg src = iselWordExpr_R(env, e->Iex.Unop.arg);
+            UInt mask = (e->Iex.Unop.op == Iop_8Sto64) ? 0xFF : 0xFFFF;
             addInstr(env, MIPSInstr_Alu(Malu_AND, tLo, src,
-                                        MIPSRH_Imm(False, 0xFF)));
+                                        MIPSRH_Imm(False, mask)));
             addInstr(env, MIPSInstr_Alu(Malu_ADD, tHi, hregMIPS_GPR0(mode64),
                                         MIPSRH_Reg(hregMIPS_GPR0(mode64))));
             *rHi = tHi;
@@ -2927,7 +5534,46 @@ static void iselInt64Expr_wrk(HReg * rHi, HReg * rLo, ISelEnv * env, IRExpr * e)
 
             *rHi = tHi;
             *rLo = tLo;
+            return;
+         }
+
+         case Iop_V128HIto64: {
+            vassert(has_msa);
+            HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+            HReg tLo = newVRegI(env);
+            HReg tHi = newVRegI(env);
+            addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, v_src, tLo, MSA_DFN_W | 2));
+            addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, v_src, tHi, MSA_DFN_W | 3));
+            *rLo = tLo;
+            *rHi = tHi;
+            return;
+         }
+
+         case Iop_V128to64: {
+            vassert(has_msa);
+            HReg v_src = iselV128Expr(env, e->Iex.Unop.arg);
+            HReg tLo = newVRegI(env);
+            HReg tHi = newVRegI(env);
+            addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, v_src, tLo, MSA_DFN_W | 0));
+            addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, v_src, tHi, MSA_DFN_W | 1));
+            *rLo = tLo;
+            *rHi = tHi;
+            return;
+         }
 
+         case Iop_F32toF16x4: {
+            vassert(has_msa);
+            HReg v_arg = iselV128Expr(env, e->Iex.Unop.arg);
+            HReg v_src = newVRegV(env);
+            set_guest_MIPS_rounding_mode_MSA(env);
+            addInstr(env, MIPSInstr_Msa3RF(MSA_FEXDO, MSA_F_WH, v_src, v_arg, v_arg));
+            set_MIPS_rounding_default_MSA(env);
+            HReg tLo = newVRegI(env);
+            HReg tHi = newVRegI(env);
+            addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, v_src, tLo, MSA_DFN_W | 0));
+            addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, v_src, tHi, MSA_DFN_W | 1));
+            *rLo = tLo;
+            *rHi = tHi;
             return;
          }
 
@@ -3148,6 +5794,19 @@ static HReg iselFltExpr_wrk(ISelEnv * env, IRExpr * e)
             set_MIPS_rounding_default(env);
             return dst;
          }
+         case Iop_ScaleF64: {
+            HReg src1   = iselFltExpr(env, e->Iex.Triop.details->arg2);
+            HReg src2   = iselFltExpr(env, e->Iex.Triop.details->arg3);
+            HReg v_help = newVRegV(env);
+            HReg dst    = newVRegF(env);
+            vassert(has_msa);
+            set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1);
+            addInstr(env, MIPSInstr_Msa2RF(MSA_FTINT_S, MSA_F_DW, v_help, src2));
+            addInstr(env, MIPSInstr_Msa3RF(MSA_FEXP2, MSA_F_DW, dst, src1, v_help));
+            set_MIPS_rounding_default_MSA(env);
+
+            return dst;
+         }
          default:
             break;
       }
@@ -3283,6 +5942,34 @@ static HReg iselFltExpr_wrk(ISelEnv * env, IRExpr * e)
             return dst;
          }
 
+         case Iop_I64UtoF64: {
+            vassert(mode64);
+            HReg r_dst = newVRegF(env);
+            HReg tmp = newVRegV(env);
+            HReg r_src;
+            vassert(has_msa);
+            r_src = iselWordExpr_R(env, e->Iex.Binop.arg2);
+            set_MIPS_rounding_mode_MSA(env, e->Iex.Binop.arg1);
+            addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_D, r_src, tmp));
+            HReg r_srch = newVRegI(env);
+            addInstr(env, MIPSInstr_Msa2RF(MSA_FFINT_U, MSA_F_DW, tmp, tmp));
+            addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, tmp, r_srch, MSA_DFN_D | 0));
+            sub_from_sp(env, 8);
+            MIPSAMode *am_addr = MIPSAMode_IR(0, StackPointer(mode64));
+
+            /* store as I64 */
+            addInstr(env, MIPSInstr_Store(8, am_addr, r_srch, mode64));
+
+            /* load as Ity_F64 */
+            addInstr(env, MIPSInstr_FpLdSt(True /*load */, 8, r_dst, am_addr));
+
+            /* Reset SP */
+            add_to_sp(env, 8);
+            set_MIPS_rounding_default_MSA(env);
+            return r_dst;
+          }
+
+
          default:
             break;
       }
@@ -3294,31 +5981,45 @@ static HReg iselFltExpr_wrk(ISelEnv * env, IRExpr * e)
          case Iop_MAddF64:
          case Iop_MSubF32:
          case Iop_MSubF64: {
-            MIPSFpOp op = 0;
+            Int op = 0;
+            MSADFFlx type = 0;
             switch (e->Iex.Qop.details->op) {
                case Iop_MAddF32:
-                  op = Mfp_MADDS;
+                  op = has_msa ? MSA_FMADD : Mfp_MADDS;
+                  type = MSA_F_WH;
                   break;
                case Iop_MAddF64:
-                  op = Mfp_MADDD;
+                  op = has_msa ? MSA_FMADD : Mfp_MADDD;
+                  type = MSA_F_DW;
                   break;
                case Iop_MSubF32:
-                  op = Mfp_MSUBS;
+                  op = has_msa ? MSA_FMSUB : Mfp_MSUBS;
+                  type = MSA_F_WH;
                   break;
                case Iop_MSubF64:
-                  op = Mfp_MSUBD;
+                  op = has_msa ? MSA_FMSUB : Mfp_MSUBD;
+                  type = MSA_F_DW;
                   break;
                default:
                   vassert(0);
             }
+
             HReg dst = newVRegF(env);
             HReg src1 = iselFltExpr(env, e->Iex.Qop.details->arg2);
             HReg src2 = iselFltExpr(env, e->Iex.Qop.details->arg3);
             HReg src3 = iselFltExpr(env, e->Iex.Qop.details->arg4);
-            set_MIPS_rounding_mode(env, e->Iex.Qop.details->arg1);
-            addInstr(env, MIPSInstr_FpTernary(op, dst,
-                                              src1, src2, src3));
-            set_MIPS_rounding_default(env);
+
+            if (has_msa) {
+               addInstr(env, MIPSInstr_MsaElm(MSA_MOVE, src3, dst, 0));
+               set_MIPS_rounding_mode_MSA(env, e->Iex.Qop.details->arg1);
+               addInstr(env, MIPSInstr_Msa3RF(op, type, dst, src1, src2));
+               set_MIPS_rounding_default_MSA(env);
+            } else {
+               set_MIPS_rounding_mode(env, e->Iex.Qop.details->arg1);
+               addInstr(env, MIPSInstr_FpTernary(op, dst,
+                                                 src1, src2, src3));
+               set_MIPS_rounding_default(env);
+            }
             return dst;
          }
 
@@ -3503,6 +6204,60 @@ static HReg iselDblExpr_wrk(ISelEnv * env, IRExpr * e)
             return dst;
          }
 
+         case Iop_I64StoF64: {
+            HReg r_dst = newVRegD(env);
+            MIPSAMode *am_addr;
+            HReg tmp, fr_src;
+            if (mode64) {
+               tmp = newVRegD(env);
+               fr_src = iselDblExpr(env, e->Iex.Binop.arg2);
+               /* Move SP down 8 bytes */
+               sub_from_sp(env, 8);
+               am_addr = MIPSAMode_IR(0, StackPointer(mode64));
+
+               /* store as I64 */
+               addInstr(env, MIPSInstr_Store(8, am_addr, fr_src, mode64));
+
+               /* load as Ity_F64 */
+               addInstr(env, MIPSInstr_FpLdSt(True /*load */, 8, tmp, am_addr));
+
+               /* Reset SP */
+               add_to_sp(env, 8);
+            } else {
+               HReg Hi, Lo;
+               tmp = newVRegD(env);
+               iselInt64Expr(&Hi, &Lo, env, e->Iex.Binop.arg2);
+               tmp = mk_LoadRR32toFPR(env, Hi, Lo);  /* 2*I32 -> F64 */
+            }
+
+            set_MIPS_rounding_mode(env, e->Iex.Binop.arg1);
+            addInstr(env, MIPSInstr_FpConvert(Mfp_CVTDL, r_dst, tmp));
+            set_MIPS_rounding_default(env);
+
+            return r_dst;
+         }
+
+         case Iop_I64UtoF64: {
+            HReg r_dst;
+            HReg tmp = newVRegV(env);
+            HReg r_src2h, r_src2l;
+            vassert(has_msa);
+            iselInt64Expr(&r_src2h, &r_src2l, env, e->Iex.Binop.arg2);
+            set_MIPS_rounding_mode_MSA(env, e->Iex.Binop.arg1);
+            addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, r_src2l, tmp));
+            addInstr(env, MIPSInstr_MsaElm(MSA_INSERT, r_src2h, tmp, MSA_DFN_W | 1));
+            addInstr(env, MIPSInstr_MsaElm(MSA_INSERT, r_src2l, tmp, MSA_DFN_W | 2));
+            addInstr(env, MIPSInstr_MsaElm(MSA_INSERT, r_src2h, tmp, MSA_DFN_W | 3));
+            HReg r_srchh = newVRegI(env);
+            HReg r_srchl = newVRegI(env);
+            addInstr(env, MIPSInstr_Msa2RF(MSA_FFINT_U, MSA_F_DW, tmp, tmp));
+            addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, tmp, r_srchl, MSA_DFN_W | 0));
+            addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, tmp, r_srchh, MSA_DFN_W | 1));
+            r_dst = mk_LoadRR32toFPR(env, r_srchh, r_srchl);
+            set_MIPS_rounding_default_MSA(env);
+            return r_dst;
+          }
+
          default:
             break;
 
@@ -3544,30 +6299,36 @@ static HReg iselDblExpr_wrk(ISelEnv * env, IRExpr * e)
             set_MIPS_rounding_default(env);
             return dst;
          }
+
+         case Iop_ScaleF64: {
+            HReg src1   = iselDblExpr(env, e->Iex.Triop.details->arg2);
+            HReg src2   = iselDblExpr(env, e->Iex.Triop.details->arg3);
+            HReg v_help = newVRegV(env);
+            HReg dst    = newVRegD(env);
+            vassert(has_msa);
+            set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1);
+            addInstr(env, MIPSInstr_Msa2RF(MSA_FTINT_S, MSA_F_DW, v_help, src2));
+            addInstr(env, MIPSInstr_Msa3RF(MSA_FEXP2, MSA_F_DW, dst, src1, v_help));
+            set_MIPS_rounding_default_MSA(env);
+            return dst;
+         }
          default:
             break;
       }
    }
 
    if (e->tag == Iex_Qop) {
+      vassert(has_msa);
       switch (e->Iex.Qop.details->op) {
-         case Iop_MAddF32:
          case Iop_MAddF64:
-         case Iop_MSubF32:
          case Iop_MSubF64: {
-            MIPSFpOp op = 0;
+            MSA3RFOp op = 0;
             switch (e->Iex.Qop.details->op) {
-               case Iop_MAddF32:
-                  op = Mfp_MADDS;
-                  break;
                case Iop_MAddF64:
-                  op = Mfp_MADDD;
-                  break;
-               case Iop_MSubF32:
-                  op = Mfp_MSUBS;
+                  op = MSA_FMADD;
                   break;
                case Iop_MSubF64:
-                  op = Mfp_MSUBD;
+                  op = MSA_FMSUB;
                   break;
                default:
                   vassert(0);
@@ -3576,10 +6337,10 @@ static HReg iselDblExpr_wrk(ISelEnv * env, IRExpr * e)
             HReg src1 = iselDblExpr(env, e->Iex.Qop.details->arg2);
             HReg src2 = iselDblExpr(env, e->Iex.Qop.details->arg3);
             HReg src3 = iselDblExpr(env, e->Iex.Qop.details->arg4);
-            set_MIPS_rounding_mode(env, e->Iex.Qop.details->arg1);
-            addInstr(env, MIPSInstr_FpTernary(op, dst,
-                                              src1, src2, src3));
-            set_MIPS_rounding_default(env);
+            addInstr(env, MIPSInstr_MsaElm(MSA_MOVE, src3, dst, 0));
+            set_MIPS_rounding_mode_MSA(env, e->Iex.Qop.details->arg1);
+            addInstr(env, MIPSInstr_Msa3RF(op, MSA_F_DW, dst, src1, src2));
+            set_MIPS_rounding_default_MSA(env);
             return dst;
          }
 
@@ -3628,6 +6389,14 @@ static void iselStmt(ISelEnv * env, IRStmt * stmt)
          MIPSAMode *am_addr;
          IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
 
+         if (tyd == Ity_V128) {
+            vassert(has_msa);
+            HReg res = iselV128Expr(env, stmt->Ist.Store.data);
+            HReg addr = iselWordExpr_R(env, stmt->Ist.Store.addr);
+            addInstr(env, MIPSInstr_MsaMi10(MSA_ST, 0, addr, res, MSA_B));
+            return;
+         }
+
          /*constructs addressing mode from address provided */
          am_addr = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd);
 
@@ -3718,6 +6487,21 @@ static void iselStmt(ISelEnv * env, IRStmt * stmt)
                                            am_addr));
             return;
          }
+         if (ty == Ity_V128) {
+            vassert(has_msa);
+            HReg v_src = iselV128Expr(env, stmt->Ist.Put.data);
+#if defined(_MIPSEB)
+            HReg r_addr = newVRegI(env);
+            addInstr(env, MIPSInstr_Alu(mode64 ? Malu_DADD : Malu_ADD, r_addr, GuestStatePointer(mode64),
+                                        MIPSRH_Imm(False, stmt->Ist.Put.offset)));
+            addInstr(env, MIPSInstr_MsaMi10(MSA_ST, 0, r_addr, v_src, MSA_B));
+#else
+            vassert(!(stmt->Ist.Put.offset & 7));
+            addInstr(env, MIPSInstr_MsaMi10(MSA_ST, stmt->Ist.Put.offset >> 3,
+                                            GuestStatePointer(mode64), v_src, MSA_D));
+#endif
+            return;
+         }
          break;
       }
 
@@ -3778,6 +6562,14 @@ static void iselStmt(ISelEnv * env, IRStmt * stmt)
                return;
             }
          }
+
+         if (ty == Ity_V128) {
+            vassert(has_msa);
+            HReg v_dst = lookupIRTemp(env, tmp);
+            HReg v_src = iselV128Expr(env, stmt->Ist.WrTmp.data);
+            addInstr(env, MIPSInstr_MsaElm(MSA_MOVE, v_src, v_dst, 0));
+            return;
+          }
          break;
       }
 
@@ -3854,16 +6646,12 @@ static void iselStmt(ISelEnv * env, IRStmt * stmt)
                }
             }
             case Ity_V128: {
-               /* ATC. The code that this produces really
-                  needs to be looked at, to verify correctness.
-                  I don't think this can ever happen though, since the
-                  MIPS front end never produces 128-bit loads/stores. */
-               vassert(0);
+               vassert(has_msa);
                vassert(rloc.pri == RLPri_V128SpRel);
+               vassert((rloc.spOff < 512) && (rloc.spOff > -512));
                vassert(addToSp >= 16);
                HReg       dst = lookupIRTemp(env, d->tmp);
-               MIPSAMode* am  = MIPSAMode_IR(rloc.spOff, StackPointer(mode64));
-               addInstr(env, MIPSInstr_Load(mode64 ? 8 : 4, dst, am, mode64));
+               addInstr(env, MIPSInstr_MsaMi10(MSA_LD, rloc.spOff, StackPointer(mode64), dst, MSA_B));
                add_to_sp(env, addToSp);
                return;
 
@@ -4166,6 +6954,7 @@ HInstrArray *iselSB_MIPS ( const IRSB* bb,
 
    mode64 = arch_host != VexArchMIPS32;
    fp_mode64 = VEX_MIPS_HOST_FP_MODE(hwcaps_host);
+   has_msa = VEX_MIPS_PROC_MSA(archinfo_host->hwcaps);
 
    /* Make up an initial environment to use. */
    env = LibVEX_Alloc_inline(sizeof(ISelEnv));
@@ -4233,6 +7022,9 @@ HInstrArray *iselSB_MIPS ( const IRSB* bb,
          case Ity_F64:
             hreg = mkHReg(True, HRcFlt64, 0, j++);
             break;
+         case Ity_V128:
+            hreg = mkHReg(True, HRcVec128, 0, j++);
+            break;
          default:
             ppIRType(bb->tyenv->types[i]);
             vpanic("iselBB(mips): IRTemp type");
index 107a6a67b1bbd16adff999845acd5e238af32d7b..4ba1ab2523fec668e61480a2285903130662facc 100644 (file)
@@ -1744,6 +1744,10 @@ static const HChar* show_hwcaps_mips32 ( UInt hwcaps )
 {
    /* MIPS baseline. */
    if (VEX_MIPS_COMP_ID(hwcaps) == VEX_PRID_COMP_MIPS) {
+      /* MIPS baseline with msa. */
+      if (VEX_MIPS_PROC_MSA(hwcaps)) {
+         return "MIPS-baseline-msa";
+      }
       /* MIPS baseline with dspr2. */
       if (VEX_MIPS_PROC_DSP2(hwcaps)) {
          return "MIPS-baseline-dspr2";
@@ -1804,7 +1808,11 @@ static const HChar* show_hwcaps_mips64 ( UInt hwcaps )
 
    /* MIPS64 baseline. */
    if (VEX_MIPS_COMP_ID(hwcaps) == VEX_PRID_COMP_MIPS) {
-      return "mips64-baseline";
+      /* MIPS baseline with msa. */
+      if (VEX_MIPS_PROC_MSA(hwcaps)) {
+         return "MIPS64-baseline-msa";
+      }
+      return "MIPS64-baseline";
    }
 
    return "Unsupported baseline";
index 8ae3e3648b7d44f1c34c28b10120483cd06ed63c..cd616299ded02ed7506a48defe2f69ace11d284f 100644 (file)
@@ -220,6 +220,7 @@ typedef
  */
 #define VEX_PRID_IMP_34K                0x9500
 #define VEX_PRID_IMP_74K                0x9700
+#define VEX_PRID_IMP_P5600              0xa800
 
 /*
  * Instead of Company Options values, bits 31:24 will be packed with
@@ -257,6 +258,11 @@ typedef
                                ((VEX_MIPS_COMP_ID(x) == VEX_PRID_COMP_MIPS) && \
                                (VEX_MIPS_PROC_ID(x) == VEX_PRID_IMP_34K)))
 
+/* Check if the processor supports MIPS MSA (SIMD)*/
+#define VEX_MIPS_PROC_MSA(x) ((VEX_MIPS_COMP_ID(x) == VEX_PRID_COMP_MIPS) && \
+                              (VEX_MIPS_PROC_ID(x) == VEX_PRID_IMP_P5600) && \
+                              (VEX_MIPS_HOST_FP_MODE(x)))
+
 /* These return statically allocated strings. */
 
 extern const HChar* LibVEX_ppVexArch    ( VexArch );
index 0ac8d30e9e3d04ca847b3a77ed2d76cf959ffd3a..c9291e9ede1858b07f7819040ffb9de2273a6e77 100644 (file)
@@ -154,7 +154,45 @@ typedef
       /*  492 */ UInt guest_LLaddr;
       /*  496 */ UInt guest_LLdata;
 
-      /*  500 */ UInt _padding2[3];
+      /*  500 */ UInt _padding2;
+
+      /* MIPS32 MSA 128-bit vector registers */
+      /*  504 */ V128 guest_w0;
+      /*  520 */ V128 guest_w1;
+      /*  536 */ V128 guest_w2;
+      /*  552 */ V128 guest_w3;
+      /*  568 */ V128 guest_w4;
+      /*  584 */ V128 guest_w5;
+      /*  600 */ V128 guest_w6;
+      /*  616 */ V128 guest_w7;
+      /*  632 */ V128 guest_w8;
+      /*  648 */ V128 guest_w9;
+      /*  664 */ V128 guest_w10;
+      /*  680 */ V128 guest_w11;
+      /*  696 */ V128 guest_w12;
+      /*  712 */ V128 guest_w13;
+      /*  728 */ V128 guest_w14;
+      /*  744 */ V128 guest_w15;
+      /*  760 */ V128 guest_w16;
+      /*  776 */ V128 guest_w17;
+      /*  792 */ V128 guest_w18;
+      /*  808 */ V128 guest_w19;
+      /*  824 */ V128 guest_w20;
+      /*  840 */ V128 guest_w21;
+      /*  856 */ V128 guest_w22;
+      /*  872 */ V128 guest_w23;
+      /*  888 */ V128 guest_w24;
+      /*  904 */ V128 guest_w25;
+      /*  920 */ V128 guest_w26;
+      /*  936 */ V128 guest_w27;
+      /*  952 */ V128 guest_w28;
+      /*  968 */ V128 guest_w29;
+      /*  984 */ V128 guest_w30;
+      /*  1000 */ V128 guest_w31;
+
+      /*  1016 */ UInt guest_MSACSR;
+
+      /*  1020 */ UInt _padding3;
 } VexGuestMIPS32State;
 /*---------------------------------------------------------------*/
 /*--- Utility functions for MIPS32 guest stuff.               ---*/
index 792803ec6d03d3883382c981b543b5bd541384f4..6a37f418480c0265537a203665d6f0dbb603f237 100644 (file)
@@ -151,7 +151,43 @@ typedef
       /*  616 */ ULong guest_LLaddr;
       /*  624 */ ULong guest_LLdata;
 
-      /*  632 */ ULong _padding2;
+      /* MIPS32 MSA 128-bit vector registers */
+      /*  632 */ V128 guest_w0;
+      /*  648 */ V128 guest_w1;
+      /*  664 */ V128 guest_w2;
+      /*  680 */ V128 guest_w3;
+      /*  696 */ V128 guest_w4;
+      /*  712 */ V128 guest_w5;
+      /*  728 */ V128 guest_w6;
+      /*  744 */ V128 guest_w7;
+      /*  760 */ V128 guest_w8;
+      /*  776 */ V128 guest_w9;
+      /*  792 */ V128 guest_w10;
+      /*  808 */ V128 guest_w11;
+      /*  824 */ V128 guest_w12;
+      /*  840 */ V128 guest_w13;
+      /*  856 */ V128 guest_w14;
+      /*  872 */ V128 guest_w15;
+      /*  888 */ V128 guest_w16;
+      /*  904 */ V128 guest_w17;
+      /*  920 */ V128 guest_w18;
+      /*  936 */ V128 guest_w19;
+      /*  952 */ V128 guest_w20;
+      /*  968 */ V128 guest_w21;
+      /*  984 */ V128 guest_w22;
+      /* 1000 */ V128 guest_w23;
+      /* 1016 */ V128 guest_w24;
+      /* 1032 */ V128 guest_w25;
+      /* 1048 */ V128 guest_w26;
+      /* 1064 */ V128 guest_w27;
+      /* 1080 */ V128 guest_w28;
+      /* 1096 */ V128 guest_w29;
+      /* 1112 */ V128 guest_w30;
+      /* 1128 */ V128 guest_w31;
+      /* 1144 */ UInt guest_MSACSR;
+
+      /* 1148 */ UInt _padding2;
+
 } VexGuestMIPS64State;
 
 /*---------------------------------------------------------------*/