From: Petar Jovanovic Date: Tue, 24 Oct 2017 16:00:28 +0000 (+0200) Subject: mips: MSA support for mips32/mips64. X-Git-Tag: VALGRIND_3_14_0~220 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4ef3d807e183377d7dcc347acff43a3f251fbbff;p=thirdparty%2Fvalgrind.git mips: MSA support for mips32/mips64. Full support of MIPS SIMD Architecture Module (MSA) instruction set. Following IOPs have been implemented using generation of MSA instructions: Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4, Iop_GetElem64x2, Iop_V128to32, Iop_V128HIto64, Iop_V128to64, Iop_F32toF16x4, Iop_Abs64x2, Iop_Abs32x4, Iop_Abs16x8, Iop_Abs8x16, Iop_Cnt8x16, Iop_NotV128, Iop_Reverse8sIn16_x8, Iop_Reverse8sIn32_x4, Iop_Reverse8sIn64_x2, Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4, Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4, Iop_Clz64x2, Iop_Abs32Fx4, Iop_Abs64Fx2, Iop_RecipEst32Fx4, Iop_RecipEst64Fx2, Iop_RSqrtEst32Fx4, Iop_RSqrtEst64Fx2, Iop_F16toF32x4, Iop_I32UtoFx4, Iop_FtoI32Sx4_RZ, Iop_FtoI32Ux4_RZ, Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2, Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2, Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2, Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2, Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2, Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2, Iop_QDMulHi32Sx4, Iop_QDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_QRDMulHi16Sx8, Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2, Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2, Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2, Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2, Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4, Iop_Shl64x2, Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4, Iop_Shr64x2, Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4, Iop_Sar64x2, Iop_InterleaveHI8x16, Iop_InterleaveHI16x8, Iop_InterleaveHI32x4, Iop_InterleaveHI64x2, Iop_InterleaveLO8x16, Iop_InterleaveLO16x8, Iop_InterleaveLO32x4, Iop_InterleaveLO64x2, Iop_InterleaveEvenLanes8x16, Iop_InterleaveEvenLanes16x8, Iop_InterleaveEvenLanes32x4, Iop_InterleaveOddLanes8x16, Iop_InterleaveOddLanes16x8, Iop_InterleaveOddLanes32x4, Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2, Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2, Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2, Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4, Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4, Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_AndV128, Iop_OrV128, Iop_XorV128, Iop_ShrV128, Iop_ShlV128, Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2, Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2, Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2, Iop_QandQSarNnarrow64Sto32Sx2, Iop_QandQSarNnarrow32Sto16Sx4, Iop_QandQRSarNnarrow64Sto32Sx2, Iop_QandQRSarNnarrow32Sto16Sx4, Iop_CmpEQ32Fx4, Iop_CmpEQ64Fx2, Iop_CmpLT32Fx4, Iop_CmpLT64Fx2, Iop_CmpLE32Fx4, Iop_CmpLE64Fx2, Iop_CmpUN32Fx4, Iop_CmpUN64Fx2, Iop_64HLtoV128, Iop_Min32Fx4, Iop_Min64Fx2, Iop_Max32Fx4, Iop_Max64Fx2, Iop_Sqrt32Fx4, Iop_Sqrt64Fx2, Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2, Iop_Mul32Fx4, Iop_Mul64Fx2, Iop_Div32Fx4, Iop_Div64Fx2, Iop_F32x4_2toQ16x8, Iop_F64x2_2toQ32x4, Iop_ScaleF64, Scale2_64Fx2, Scale2_32Fx4, Iop_Log2_32Fx4, Iop_Log2_64Fx2, Iop_PackOddLanes8x16, Iop_PackEvenLanes8x16, Iop_PackOddLanes16x8, Iop_PackEvenLanes16x8, Iop_PackOddLanes32x4, Iop_PackEvenLanes32x4. Folowing IOPs have been implemented without generating MSA instructions: Iop_CmpEQ8, Iop_MullU8, Iop_MullS8, Iop_MullU16, Iop_MullS16, Iop_DivS32, Iop_DivU32, Iop_DivS64, Iop_DivU64, Iop_F32toI32U, Iop_F64toI64U, Iop_I64UtoF64 Imlementation of the following IOPs has been changed in order to use MSA when it is possible: Iop_MAddF64, Iop_MSubF32, Iop_MSubF64. Contributed by: Tamara Vlahovic, Aleksandar Rikalo and Aleksandra Karadzic. Related BZ issue - #382563. --- diff --git a/VEX/priv/guest_mips_defs.h b/VEX/priv/guest_mips_defs.h index 5ea213d222..6ee6728d4c 100644 --- a/VEX/priv/guest_mips_defs.h +++ b/VEX/priv/guest_mips_defs.h @@ -94,6 +94,20 @@ typedef enum { SUBS, SUBD, DIVS } flt_op; +typedef enum { + FADDW=0, FADDD, FSUBW, FSUBD, FMULW, FMULD, FDIVW, FDIVD, FMADDW, FMADDD, + FCAFD, FCAFW, FSAFD, FSAFW, FCEQD, FCEQW, FSEQD, FSEQW, FCLTD, FCLTW, FSLTD, + FSLTW, FCLED, FCLEW, FSLED, FSLEW, FCNED, FCNEW, FSNED, FSNEW, FCUND, FCUNW, + FSUND, FSUNW, FCORD, FCORW, FSORD, FSORW, FCUEQD, FCUEQW, FSUEQD, FSUEQW, + FCUNED, FCUNEW, FSUNED, FSUNEW, FCULED, FCULEW, FSULED, FSULEW, FCULTD, + FCULTW, FSULTD, FSULTW, FEXP2W, FEXP2D, FMINW, FMIND, FMINAW, FMINAD, FMAXW, + FMAXD, FMAXAW, FMAXAD, FFINTSW, FFINTSD, FRCPW, FRCPD, FRSQRTW, FRSQRTD, + FSQRTW, FSQRTD, FRINTW, FRINTD, FTRUNCUW, FTRUNCUD, FTRUNCSW, FTRUNCSD, + FEXDOH, FEXDOW, FEXUPRD, FEXUPRW, FEXUPLD, FEXUPLW, FLOG2W, FLOG2D, + FTQH, FTQW, FFQRW, FFQRD,FFQLW, FFQLD, FTINT_SW, FTINT_SD, + FTINT_UW, FTINT_UD, FFINT_UW, FFINT_UD, +} msa_flt_op; + #if defined (_MIPSEL) #define MIPS_IEND Iend_LE #else @@ -109,6 +123,11 @@ extern UInt mips_dirtyhelper_calculate_FCSR_fp32 ( void* guest_state, UInt fs, extern UInt mips_dirtyhelper_calculate_FCSR_fp64 ( void* guest_state, UInt fs, UInt ft, flt_op op ); +extern UInt mips_dirtyhelper_calculate_MSACSR ( void* gs, UInt ws, UInt wt, + msa_flt_op inst ); +extern UInt mips_dirtyhelper_get_MSAIR ( void ); + + /*---------------------------------------------------------*/ /*--- Condition code stuff ---*/ /*---------------------------------------------------------*/ diff --git a/VEX/priv/guest_mips_helpers.c b/VEX/priv/guest_mips_helpers.c index 00a92c3ac7..3f21512a82 100644 --- a/VEX/priv/guest_mips_helpers.c +++ b/VEX/priv/guest_mips_helpers.c @@ -40,6 +40,12 @@ #include "guest_generic_bb_to_IR.h" #include "guest_mips_defs.h" +#if defined (__GNUC__) +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +#else +#define GCC_VERSION 0 +#endif + /* This file contains helper functions for mips guest code. Calls to these functions are generated by the back end. */ @@ -176,6 +182,13 @@ void LibVEX_GuestMIPS32_initialise( /*OUT*/ VexGuestMIPS32State * vex_state) vex_state->guest_ac1 = 0; /* Accumulator 1 */ vex_state->guest_ac2 = 0; /* Accumulator 2 */ vex_state->guest_ac3 = 0; /* Accumulator 3 */ + + vex_state->guest_w0.w64[0] = 0; + vex_state->guest_w0.w64[1] = 0; + vex_state->guest_w1.w64[0] = 0; + vex_state->guest_w1.w64[1] = 0; + vex_state->guest_w2.w64[0] = 0; + vex_state->guest_w2.w64[1] = 0; } void LibVEX_GuestMIPS64_initialise ( /*OUT*/ VexGuestMIPS64State * vex_state ) @@ -282,6 +295,8 @@ void LibVEX_GuestMIPS64_initialise ( /*OUT*/ VexGuestMIPS64State * vex_state ) vex_state->guest_LLaddr = 0xFFFFFFFFFFFFFFFFULL; vex_state->guest_LLdata = 0; + + vex_state->guest_MSACSR = 0; } /*-----------------------------------------------------------*/ @@ -511,6 +526,25 @@ HWord mips_dirtyhelper_rdhwr ( UInt rd ) : "t0", "$f24" \ ); +#define ASM_VOLATILE_MSA_UNARY(inst) \ + __asm__ volatile(".set push" "\n\t" \ + ".set mips32r2" "\n\t" \ + ".set hardfloat" "\n\t" \ + ".set fp=64" "\n\t" \ + ".set msa" "\n\t" \ + ".set noreorder" "\n\t" \ + "cfcmsa $t0, $1" "\n\t" \ + "ctcmsa $1, %2" "\n\t" \ + "ld.b $w24, 0(%1)" "\n\t" \ + #inst" $w24, $w24" "\n\t" \ + "cfcmsa %0, $1" "\n\t" \ + "ctcmsa $1, $t0" "\n\t" \ + ".set pop" "\n\t" \ + : "=r" (ret) \ + : "r" (&(addr[ws])), "r" (msacsr) \ + : "t0" \ + ); + #define ASM_VOLATILE_BINARY32(inst) \ __asm__ volatile(".set push" "\n\t" \ ".set hardfloat" "\n\t" \ @@ -559,6 +593,25 @@ HWord mips_dirtyhelper_rdhwr ( UInt rd ) : "t0", "$f24", "$f26" \ ); +#define ASM_VOLATILE_MSA_BINARY(inst) \ + __asm__ volatile(".set push" "\n\t" \ + ".set mips32r2" "\n\t" \ + ".set hardfloat" "\n\t" \ + ".set fp=64" "\n\t" \ + ".set msa" "\n\t" \ + "cfcmsa $t0, $1" "\n\t" \ + "ctcmsa $1, %3" "\n\t" \ + "ld.b $w24, 0(%1)" "\n\t" \ + "ld.b $w26, 0(%2)" "\n\t" \ + #inst" $w24, $w24, $w26" "\n\t" \ + "cfcmsa %0, $1" "\n\t" \ + "ctcmsa $1, $t0" "\n\t" \ + ".set pop" "\n\t" \ + : "=r" (ret) \ + : "r" (&(addr[ws])), "r" (&(addr[wt])), "r" (msacsr)\ + : "t0" \ + ); + /* TODO: Add cases for all fpu instructions because all fpu instructions are change the value of FCSR register. */ extern UInt mips_dirtyhelper_calculate_FCSR_fp32 ( void* gs, UInt fs, UInt ft, @@ -767,6 +820,436 @@ extern UInt mips_dirtyhelper_calculate_FCSR_fp64 ( void* gs, UInt fs, UInt ft, return ret; } + +extern UInt mips_dirtyhelper_calculate_MSACSR ( void* gs, UInt ws, UInt wt, + msa_flt_op inst ) { + UInt ret = 0; +/* GCC 4.8 and later support MIPS MSA. */ +#if defined(__mips__) && (defined(__clang__) || (GCC_VERSION >= 408)) +#if defined(VGA_mips32) + VexGuestMIPS32State* guest_state = (VexGuestMIPS32State*)gs; +#else + VexGuestMIPS64State* guest_state = (VexGuestMIPS64State*)gs; +#endif + V128 *addr = (V128 *)&guest_state->guest_w0; + UInt msacsr = guest_state->guest_MSACSR; + + switch (inst) { + case FADDW: + ASM_VOLATILE_MSA_BINARY(fadd.w) + break; + + case FADDD: + ASM_VOLATILE_MSA_BINARY(fadd.d) + break; + + case FSUBW: + ASM_VOLATILE_MSA_BINARY(fsub.w); + break; + + case FSUBD: + ASM_VOLATILE_MSA_BINARY(fsub.d); + break; + + case FMULW: + ASM_VOLATILE_MSA_BINARY(fmul.w); + break; + + case FMULD: + ASM_VOLATILE_MSA_BINARY(fmul.d); + break; + + case FDIVW: + ASM_VOLATILE_MSA_BINARY(fdiv.w); + break; + + case FDIVD: + ASM_VOLATILE_MSA_BINARY(fdiv.d); + break; + + case FMADDW: + ASM_VOLATILE_MSA_BINARY(fmadd.w); + break; + + case FMADDD: + ASM_VOLATILE_MSA_BINARY(fmadd.d); + break; + + case FCAFW: + ASM_VOLATILE_MSA_BINARY(fcaf.w); + break; + + case FCAFD: + ASM_VOLATILE_MSA_BINARY(fcaf.d); + break; + + case FSAFW: + ASM_VOLATILE_MSA_BINARY(fsaf.w); + break; + + case FSAFD: + ASM_VOLATILE_MSA_BINARY(fsaf.d); + break; + + case FCEQW: + ASM_VOLATILE_MSA_BINARY(fceq.w); + break; + + case FCEQD: + ASM_VOLATILE_MSA_BINARY(fceq.d); + break; + + case FSEQW: + ASM_VOLATILE_MSA_BINARY(fseq.w); + break; + + case FSEQD: + ASM_VOLATILE_MSA_BINARY(fseq.d); + break; + + case FCLTW: + ASM_VOLATILE_MSA_BINARY(fclt.w); + break; + + case FCLTD: + ASM_VOLATILE_MSA_BINARY(fclt.d); + break; + + case FSLTW: + ASM_VOLATILE_MSA_BINARY(fslt.w); + break; + + case FSLTD: + ASM_VOLATILE_MSA_BINARY(fslt.d); + break; + + case FCLEW: + ASM_VOLATILE_MSA_BINARY(fcle.w); + break; + + case FCLED: + ASM_VOLATILE_MSA_BINARY(fcle.d); + break; + + case FSLEW: + ASM_VOLATILE_MSA_BINARY(fsle.w); + break; + + case FSLED: + ASM_VOLATILE_MSA_BINARY(fsle.d); + break; + + case FCNEW: + ASM_VOLATILE_MSA_BINARY(fcne.w); + break; + + case FCNED: + ASM_VOLATILE_MSA_BINARY(fcne.d); + break; + + case FSNEW: + ASM_VOLATILE_MSA_BINARY(fsne.w); + break; + + case FSNED: + ASM_VOLATILE_MSA_BINARY(fsne.d); + break; + + case FEXP2W: + ASM_VOLATILE_MSA_BINARY(fexp2.w); + break; + + case FEXP2D: + ASM_VOLATILE_MSA_BINARY(fexp2.d); + break; + + case FMINW: + ASM_VOLATILE_MSA_BINARY(fmin.w); + break; + + case FMIND: + ASM_VOLATILE_MSA_BINARY(fmin.d); + break; + + case FMINAW: + ASM_VOLATILE_MSA_BINARY(fmin_a.w); + break; + + case FMINAD: + ASM_VOLATILE_MSA_BINARY(fmin_a.d); + break; + + case FCUNW: + ASM_VOLATILE_MSA_BINARY(fcun.w); + break; + + case FCUND: + ASM_VOLATILE_MSA_BINARY(fcun.d); + break; + + case FSUNW: + ASM_VOLATILE_MSA_BINARY(fsun.w); + break; + + case FSUND: + ASM_VOLATILE_MSA_BINARY(fsun.d); + break; + + case FCORW: + ASM_VOLATILE_MSA_BINARY(fcor.w); + break; + + case FCORD: + ASM_VOLATILE_MSA_BINARY(fcor.d); + break; + + case FSORW: + ASM_VOLATILE_MSA_BINARY(fsor.w); + break; + + case FSORD: + ASM_VOLATILE_MSA_BINARY(fsor.d); + break; + + case FCUEQW: + ASM_VOLATILE_MSA_BINARY(fcueq.w); + break; + + case FCUEQD: + ASM_VOLATILE_MSA_BINARY(fcueq.d); + break; + + case FSUEQW: + ASM_VOLATILE_MSA_BINARY(fsueq.w); + break; + + case FSUEQD: + ASM_VOLATILE_MSA_BINARY(fsueq.d); + break; + + case FCUNEW: + ASM_VOLATILE_MSA_BINARY(fcune.w); + break; + + case FCUNED: + ASM_VOLATILE_MSA_BINARY(fcune.d); + break; + + case FSUNEW: + ASM_VOLATILE_MSA_BINARY(fsune.w); + break; + + case FSUNED: + ASM_VOLATILE_MSA_BINARY(fsune.d); + break; + + case FCULEW: + ASM_VOLATILE_MSA_BINARY(fcule.w); + break; + + case FCULED: + ASM_VOLATILE_MSA_BINARY(fcule.d); + break; + + case FSULEW: + ASM_VOLATILE_MSA_BINARY(fsule.w); + break; + + case FSULED: + ASM_VOLATILE_MSA_BINARY(fsule.d); + break; + + case FCULTW: + ASM_VOLATILE_MSA_BINARY(fcult.w); + break; + + case FCULTD: + ASM_VOLATILE_MSA_BINARY(fcult.d); + break; + + case FSULTW: + ASM_VOLATILE_MSA_BINARY(fsult.w); + break; + + case FSULTD: + ASM_VOLATILE_MSA_BINARY(fsult.d); + + case FMAXW: + ASM_VOLATILE_MSA_BINARY(fmax.w); + break; + + case FMAXD: + ASM_VOLATILE_MSA_BINARY(fmax.d); + break; + + case FMAXAW: + ASM_VOLATILE_MSA_BINARY(fmax_a.w); + break; + + case FMAXAD: + ASM_VOLATILE_MSA_BINARY(fmax_a.d); + break; + + case FFINTSW: + ASM_VOLATILE_MSA_UNARY(ffint_s.w); + break; + + case FFINTSD: + ASM_VOLATILE_MSA_UNARY(ffint_s.d); + break; + + case FRCPW: + ASM_VOLATILE_MSA_UNARY(frcp.w); + break; + + case FRCPD: + ASM_VOLATILE_MSA_UNARY(frcp.d); + break; + + case FRSQRTW: + ASM_VOLATILE_MSA_UNARY(frsqrt.w); + break; + + case FRSQRTD: + ASM_VOLATILE_MSA_UNARY(frsqrt.d); + break; + + case FSQRTW: + ASM_VOLATILE_MSA_UNARY(fsqrt.w); + break; + + case FSQRTD: + ASM_VOLATILE_MSA_UNARY(fsqrt.d); + break; + + case FRINTW: + ASM_VOLATILE_MSA_UNARY(frint.w); + break; + + case FRINTD: + ASM_VOLATILE_MSA_UNARY(frint.d); + + case FTRUNCUW: + ASM_VOLATILE_MSA_UNARY(ftrunc_u.w); + break; + + case FTRUNCUD: + ASM_VOLATILE_MSA_UNARY(ftrunc_u.d); + break; + + case FTRUNCSW: + ASM_VOLATILE_MSA_UNARY(ftrunc_s.w); + break; + + case FTRUNCSD: + ASM_VOLATILE_MSA_UNARY(ftrunc_s.d); + break; + + case FEXDOH: + ASM_VOLATILE_MSA_BINARY(fexdo.h); + break; + + case FEXDOW: + ASM_VOLATILE_MSA_BINARY(fexdo.w); + break; + + case FEXUPRW: + ASM_VOLATILE_MSA_UNARY(fexupr.w); + break; + + case FEXUPRD: + ASM_VOLATILE_MSA_UNARY(fexupr.d); + break; + + case FEXUPLW: + ASM_VOLATILE_MSA_UNARY(fexupl.w); + break; + + case FEXUPLD: + ASM_VOLATILE_MSA_UNARY(fexupl.d); + break; + + case FTQH: + ASM_VOLATILE_MSA_BINARY(ftq.h); + break; + + case FTQW: + ASM_VOLATILE_MSA_BINARY(ftq.w); + break; + + case FFQRD: + ASM_VOLATILE_MSA_UNARY(ffqr.d); + break; + + case FFQRW: + ASM_VOLATILE_MSA_UNARY(ffqr.w); + break; + + case FFQLD: + ASM_VOLATILE_MSA_UNARY(ffql.d); + break; + + case FFQLW: + ASM_VOLATILE_MSA_UNARY(ffql.w); + break; + + case FTINT_SD: + ASM_VOLATILE_MSA_UNARY(ftint_s.d); + break; + + case FTINT_SW: + ASM_VOLATILE_MSA_UNARY(ftint_s.w); + break; + + case FTINT_UD: + ASM_VOLATILE_MSA_UNARY(ftint_u.d); + break; + + case FTINT_UW: + ASM_VOLATILE_MSA_UNARY(ftint_u.w); + break; + + case FLOG2D: + ASM_VOLATILE_MSA_UNARY(flog2.d); + break; + + case FLOG2W: + ASM_VOLATILE_MSA_UNARY(flog2.w); + break; + + case FFINT_UD: + ASM_VOLATILE_MSA_UNARY(ffint_u.d); + break; + + case FFINT_UW: + ASM_VOLATILE_MSA_UNARY(ffint_u.w); + break; + } + +#endif + return ret; +} + +extern UInt mips_dirtyhelper_get_MSAIR() { + UInt ret = 0; +/* GCC 4.8 and later support MIPS MSA. */ +#if defined(__mips__) && (defined(__clang__) || (GCC_VERSION >= 408)) + __asm__ volatile(".set push \n\t" + ".set mips32r2 \n\t" + ".set hardfloat \n\t" + ".set fp=64 \n\t" + ".set msa \n\t" + ".set noreorder \n\t" + "cfcmsa %0, $0 \n\t" + ".set pop \n\t" + : "=r" (ret) : : ); +#endif + return ret; +} + + + + /*---------------------------------------------------------------*/ /*--- end guest_mips_helpers.c ---*/ /*---------------------------------------------------------------*/ diff --git a/VEX/priv/guest_mips_toIR.c b/VEX/priv/guest_mips_toIR.c index d5215f1043..0063ae3026 100644 --- a/VEX/priv/guest_mips_toIR.c +++ b/VEX/priv/guest_mips_toIR.c @@ -75,6 +75,9 @@ static Bool mode64 = False; /* CPU has FPU and 32 dbl. prec. FP registers. */ static Bool fp_mode64 = False; +/* CPU has MSA unit */ +static Bool has_msa = False; + /* Define 1.0 in single and double precision. */ #define ONE_SINGLE 0x3F800000 #define ONE_DOUBLE 0x3FF0000000000000ULL @@ -421,6 +424,286 @@ static UInt accumulatorGuestRegOffset(UInt acNo) return ret; } +/* ---------------- MIPS32 MSA registers ---------------- */ + +static UInt msaGuestRegOffset(UInt msaRegNo) { + vassert(msaRegNo <= 31); + UInt ret; + + if (mode64) { + switch (msaRegNo) { + case 0: + ret = offsetof(VexGuestMIPS64State, guest_w0); + break; + + case 1: + ret = offsetof(VexGuestMIPS64State, guest_w1); + break; + + case 2: + ret = offsetof(VexGuestMIPS64State, guest_w2); + break; + + case 3: + ret = offsetof(VexGuestMIPS64State, guest_w3); + break; + + case 4: + ret = offsetof(VexGuestMIPS64State, guest_w4); + break; + + case 5: + ret = offsetof(VexGuestMIPS64State, guest_w5); + break; + + case 6: + ret = offsetof(VexGuestMIPS64State, guest_w6); + break; + + case 7: + ret = offsetof(VexGuestMIPS64State, guest_w7); + break; + + case 8: + ret = offsetof(VexGuestMIPS64State, guest_w8); + break; + + case 9: + ret = offsetof(VexGuestMIPS64State, guest_w9); + break; + + case 10: + ret = offsetof(VexGuestMIPS64State, guest_w10); + break; + + case 11: + ret = offsetof(VexGuestMIPS64State, guest_w11); + break; + + case 12: + ret = offsetof(VexGuestMIPS64State, guest_w12); + break; + + case 13: + ret = offsetof(VexGuestMIPS64State, guest_w13); + break; + + case 14: + ret = offsetof(VexGuestMIPS64State, guest_w14); + break; + + case 15: + ret = offsetof(VexGuestMIPS64State, guest_w15); + break; + + case 16: + ret = offsetof(VexGuestMIPS64State, guest_w16); + break; + + case 17: + ret = offsetof(VexGuestMIPS64State, guest_w17); + break; + + case 18: + ret = offsetof(VexGuestMIPS64State, guest_w18); + break; + + case 19: + ret = offsetof(VexGuestMIPS64State, guest_w19); + break; + + case 20: + ret = offsetof(VexGuestMIPS64State, guest_w20); + break; + + case 21: + ret = offsetof(VexGuestMIPS64State, guest_w21); + break; + + case 22: + ret = offsetof(VexGuestMIPS64State, guest_w22); + break; + + case 23: + ret = offsetof(VexGuestMIPS64State, guest_w23); + break; + + case 24: + ret = offsetof(VexGuestMIPS64State, guest_w24); + break; + + case 25: + ret = offsetof(VexGuestMIPS64State, guest_w25); + break; + + case 26: + ret = offsetof(VexGuestMIPS64State, guest_w26); + break; + + case 27: + ret = offsetof(VexGuestMIPS64State, guest_w27); + break; + + case 28: + ret = offsetof(VexGuestMIPS64State, guest_w28); + break; + + case 29: + ret = offsetof(VexGuestMIPS64State, guest_w29); + break; + + case 30: + ret = offsetof(VexGuestMIPS64State, guest_w30); + break; + + case 31: + ret = offsetof(VexGuestMIPS64State, guest_w31); + break; + + default: + vassert(0); + break; + } + } else { + switch (msaRegNo) { + case 0: + ret = offsetof(VexGuestMIPS32State, guest_w0); + break; + + case 1: + ret = offsetof(VexGuestMIPS32State, guest_w1); + break; + + case 2: + ret = offsetof(VexGuestMIPS32State, guest_w2); + break; + + case 3: + ret = offsetof(VexGuestMIPS32State, guest_w3); + break; + + case 4: + ret = offsetof(VexGuestMIPS32State, guest_w4); + break; + + case 5: + ret = offsetof(VexGuestMIPS32State, guest_w5); + break; + + case 6: + ret = offsetof(VexGuestMIPS32State, guest_w6); + break; + + case 7: + ret = offsetof(VexGuestMIPS32State, guest_w7); + break; + + case 8: + ret = offsetof(VexGuestMIPS32State, guest_w8); + break; + + case 9: + ret = offsetof(VexGuestMIPS32State, guest_w9); + break; + + case 10: + ret = offsetof(VexGuestMIPS32State, guest_w10); + break; + + case 11: + ret = offsetof(VexGuestMIPS32State, guest_w11); + break; + + case 12: + ret = offsetof(VexGuestMIPS32State, guest_w12); + break; + + case 13: + ret = offsetof(VexGuestMIPS32State, guest_w13); + break; + + case 14: + ret = offsetof(VexGuestMIPS32State, guest_w14); + break; + + case 15: + ret = offsetof(VexGuestMIPS32State, guest_w15); + break; + + case 16: + ret = offsetof(VexGuestMIPS32State, guest_w16); + break; + + case 17: + ret = offsetof(VexGuestMIPS32State, guest_w17); + break; + + case 18: + ret = offsetof(VexGuestMIPS32State, guest_w18); + break; + + case 19: + ret = offsetof(VexGuestMIPS32State, guest_w19); + break; + + case 20: + ret = offsetof(VexGuestMIPS32State, guest_w20); + break; + + case 21: + ret = offsetof(VexGuestMIPS32State, guest_w21); + break; + + case 22: + ret = offsetof(VexGuestMIPS32State, guest_w22); + break; + + case 23: + ret = offsetof(VexGuestMIPS32State, guest_w23); + break; + + case 24: + ret = offsetof(VexGuestMIPS32State, guest_w24); + break; + + case 25: + ret = offsetof(VexGuestMIPS32State, guest_w25); + break; + + case 26: + ret = offsetof(VexGuestMIPS32State, guest_w26); + break; + + case 27: + ret = offsetof(VexGuestMIPS32State, guest_w27); + break; + + case 28: + ret = offsetof(VexGuestMIPS32State, guest_w28); + break; + + case 29: + ret = offsetof(VexGuestMIPS32State, guest_w29); + break; + + case 30: + ret = offsetof(VexGuestMIPS32State, guest_w30); + break; + + case 31: + ret = offsetof(VexGuestMIPS32State, guest_w31); + break; + + default: + vassert(0); + break; + } + } + + return ret; +} + + /* Do a endian load of a 32-bit word, regardless of the endianness of the underlying host. */ static inline UInt getUInt(const UChar * p) @@ -469,6 +752,15 @@ static inline UInt getUInt(const UChar * p) assign(t1, binop(Iop_Add64, getIReg(rs), \ mkU64(extend_s_16to64(imm)))); \ +#define LOAD_STORE_PATTERN_MSA(imm) \ + t1 = newTemp(mode64 ? Ity_I64 : Ity_I32); \ + if (!mode64) \ + assign(t1, binop(Iop_Add32, getIReg(ws), \ + mkU32(extend_s_10to32(imm)))); \ + else \ + assign(t1, binop(Iop_Add64, getIReg(ws), \ + mkU64(extend_s_10to64(imm)))); \ + #define LOADX_STORE_PATTERN \ t1 = newTemp(mode64 ? Ity_I64 : Ity_I32); \ if(!mode64) \ @@ -765,11 +1057,25 @@ static Bool branch_or_jump(const UChar * addr) } if (opcode == 0x11) { - /*bc1f & bc1t */ + /* bc1f & bc1t */ fmt = get_fmt(cins); if (fmt == 0x08) { return True; } + + /* MSA branches */ + /* bnz.df, bz.df */ + if (fmt >= 0x18) { + return True; + } + /* bnz.v */ + if (fmt == 0x0f) { + return True; + } + /* bz.v */ + if (fmt == 0x0b) { + return True; + } } /* bposge32 */ @@ -964,9 +1270,9 @@ static UShort extend_s_10to16(UInt x) return (UShort) ((((Int) x) << 22) >> 22); } -static ULong extend_s_10to32(UInt x) +static UInt extend_s_10to32(UInt x) { - return (ULong)((((Long) x) << 22) >> 22); + return (UInt)((((Int) x) << 22) >> 22); } static ULong extend_s_10to64(UInt x) @@ -1066,6 +1372,12 @@ static IRExpr *getIReg(UInt iregNo) } } + +static IRExpr *getWReg(UInt wregNo) { + vassert(wregNo <= 31); + return IRExpr_Get(msaGuestRegOffset(wregNo), Ity_V128); +} + static IRExpr *getHI(void) { if (mode64) @@ -1106,6 +1418,13 @@ static IRExpr *getLLdata(void) return IRExpr_Get(offsetof(VexGuestMIPS32State, guest_LLdata), Ity_I32); } +static IRExpr *getMSACSR(void) { + if (mode64) + return IRExpr_Get(offsetof(VexGuestMIPS64State, guest_MSACSR), Ity_I32); + else + return IRExpr_Get(offsetof(VexGuestMIPS32State, guest_MSACSR), Ity_I32); +} + /* Get byte from register reg, byte pos from 0 to 3 (or 7 for MIPS64) . */ static IRExpr *getByteFromReg(UInt reg, UInt byte_pos) { @@ -1144,6 +1463,13 @@ static void putLLdata(IRExpr * e) stmt(IRStmt_Put(offsetof(VexGuestMIPS32State, guest_LLdata), e)); } +static void putMSACSR(IRExpr * e) { + if (mode64) + stmt(IRStmt_Put(offsetof(VexGuestMIPS64State, guest_MSACSR), e)); + else + stmt(IRStmt_Put(offsetof(VexGuestMIPS32State, guest_MSACSR), e)); +} + /* fs - fpu source register number. inst - fpu instruction that needs to be executed. sz32 - size of source register. @@ -1231,26 +1557,90 @@ static void calculateFCSR(UInt fs, UInt ft, UInt inst, Bool sz32, UInt opN) putFCSR(mkexpr(fcsr)); } -static IRExpr *getULR(void) -{ - if (mode64) - return IRExpr_Get(offsetof(VexGuestMIPS64State, guest_ULR), Ity_I64); - else - return IRExpr_Get(offsetof(VexGuestMIPS32State, guest_ULR), Ity_I32); -} +/* ws, wt - source MSA register numbers. + inst - MSA fp instruction that needs to be executed. + opN - number of operads: + 1 - unary operation. + 2 - binary operation. */ +static void calculateMSACSR(UInt ws, UInt wt, UInt inst, UInt opN) { + IRDirty *d; + IRTemp msacsr = newTemp(Ity_I32); + /* IRExpr_BBPTR() => Need to pass pointer to guest state to helper. */ + d = unsafeIRDirty_1_N(msacsr, 0, + "mips_dirtyhelper_calculate_MSACSR", + &mips_dirtyhelper_calculate_MSACSR, + mkIRExprVec_4(IRExpr_GSPTR(), + mkU32(ws), + mkU32(wt), + mkU32(inst))); -static void putIReg(UInt archreg, IRExpr * e) -{ - IRType ty = mode64 ? Ity_I64 : Ity_I32; - vassert(archreg < 32); - vassert(typeOfIRExpr(irsb->tyenv, e) == ty); - if (archreg != 0) - stmt(IRStmt_Put(integerGuestRegOffset(archreg), e)); -} + if (opN == 1) { /* Unary operation. */ + /* Declare we're reading guest state. */ + d->nFxState = 2; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; /* read */ -static IRExpr *mkNarrowTo32(IRType ty, IRExpr * src) -{ - vassert(ty == Ity_I32 || ty == Ity_I64); + if (mode64) + d->fxState[0].offset = offsetof(VexGuestMIPS64State, guest_MSACSR); + else + d->fxState[0].offset = offsetof(VexGuestMIPS32State, guest_MSACSR); + + d->fxState[0].size = sizeof(UInt); + d->fxState[1].fx = Ifx_Read; /* read */ + d->fxState[1].offset = msaGuestRegOffset(ws); + d->fxState[1].size = sizeof(ULong); + } else if (opN == 2) { /* Binary operation. */ + /* Declare we're reading guest state. */ + d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; /* read */ + + if (mode64) + d->fxState[0].offset = offsetof(VexGuestMIPS64State, guest_MSACSR); + else + d->fxState[0].offset = offsetof(VexGuestMIPS32State, guest_MSACSR); + + d->fxState[0].size = sizeof(UInt); + d->fxState[1].fx = Ifx_Read; /* read */ + d->fxState[1].offset = msaGuestRegOffset(ws); + d->fxState[1].size = sizeof(ULong); + d->fxState[2].fx = Ifx_Read; /* read */ + d->fxState[2].offset = msaGuestRegOffset(wt); + d->fxState[2].size = sizeof(ULong); + } + + stmt(IRStmt_Dirty(d)); + putMSACSR(mkexpr(msacsr)); +} + +static IRExpr *getULR(void) +{ + if (mode64) + return IRExpr_Get(offsetof(VexGuestMIPS64State, guest_ULR), Ity_I64); + else + return IRExpr_Get(offsetof(VexGuestMIPS32State, guest_ULR), Ity_I32); +} + +static void putIReg(UInt archreg, IRExpr * e) +{ + IRType ty = mode64 ? Ity_I64 : Ity_I32; + vassert(archreg < 32); + vassert(typeOfIRExpr(irsb->tyenv, e) == ty); + if (archreg != 0) + stmt(IRStmt_Put(integerGuestRegOffset(archreg), e)); +} + +static void putWReg(UInt wregNo, IRExpr * e) { + vassert(wregNo <= 31); + vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128); + stmt(IRStmt_Put(msaGuestRegOffset(wregNo), e)); + stmt(IRStmt_Put(floatGuestRegOffset(wregNo), + unop(Iop_ReinterpI64asF64, unop(Iop_V128to64, e)))); +} + +static IRExpr *mkNarrowTo32(IRType ty, IRExpr * src) +{ + vassert(ty == Ity_I32 || ty == Ity_I64); return ty == Ity_I64 ? unop(Iop_64to32, src) : src; } @@ -1311,6 +1701,12 @@ static IRExpr *mkNarrowTo8 ( IRType ty, IRExpr * src ) return ty == Ity_I64 ? unop(Iop_64to8, src) : unop(Iop_32to8, src); } +static IRExpr *mkNarrowTo16 ( IRType ty, IRExpr * src ) +{ + vassert(ty == Ity_I32 || ty == Ity_I64); + return ty == Ity_I64 ? unop(Iop_64to16, src) : unop(Iop_32to16, src); +} + static void putPC(IRExpr * e) { stmt(IRStmt_Put(OFFB_PC, e)); @@ -1492,6 +1888,11 @@ static void putFReg(UInt dregNo, IRExpr * e) IRType ty = fp_mode64 ? Ity_F64 : Ity_F32; vassert(typeOfIRExpr(irsb->tyenv, e) == ty); stmt(IRStmt_Put(floatGuestRegOffset(dregNo), e)); + if (has_msa && fp_mode64) { + stmt(IRStmt_Put(msaGuestRegOffset(dregNo), + binop(Iop_64HLtoV128, + mkU64(0), unop(Iop_ReinterpF64asI64, e)))); + } } static void putDReg(UInt dregNo, IRExpr * e) @@ -1501,6 +1902,10 @@ static void putDReg(UInt dregNo, IRExpr * e) IRType ty = Ity_F64; vassert(typeOfIRExpr(irsb->tyenv, e) == ty); stmt(IRStmt_Put(floatGuestRegOffset(dregNo), e)); + if (has_msa) + stmt(IRStmt_Put(msaGuestRegOffset(dregNo), + binop(Iop_64HLtoV128, + mkU64(0), unop(Iop_ReinterpF64asI64, e)))); } else { vassert(dregNo < 32); vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64); @@ -1555,6 +1960,30 @@ static IRExpr* get_IR_roundingmode ( void ) binop(Iop_Shl32, mkexpr(rm_MIPS), mkU8(1)), mkU32(2))); } +static IRExpr* get_IR_roundingmode_MSA ( void ) { + /* + rounding mode | MIPS | IR + ------------------------ + to nearest | 00 | 00 + to zero | 01 | 11 + to +infinity | 10 | 10 + to -infinity | 11 | 01 + */ + IRTemp rm_MIPS = newTemp(Ity_I32); + /* Last two bits in MSACSR are rounding mode. */ + + if (mode64) + assign(rm_MIPS, binop(Iop_And32, IRExpr_Get(offsetof(VexGuestMIPS64State, + guest_MSACSR), Ity_I32), mkU32(3))); + else + assign(rm_MIPS, binop(Iop_And32, IRExpr_Get(offsetof(VexGuestMIPS32State, + guest_MSACSR), Ity_I32), mkU32(3))); + + /* rm_IR = XOR( rm_MIPS32, (rm_MIPS32 << 1) & 2) */ + return binop(Iop_Xor32, mkexpr(rm_MIPS), binop(Iop_And32, + binop(Iop_Shl32, mkexpr(rm_MIPS), mkU8(1)), mkU32(2))); +} + /* sz, ULong -> IRExpr */ static IRExpr *mkSzImm ( IRType ty, ULong imm64 ) { @@ -11972,121 +12401,13378 @@ static UInt disDSPInstr_MIPS_WRK ( UInt cins ) assign(t1, binop(Iop_Shl32, getIReg(rt), mkU8(rd))); - if (31 == rd) { - putIReg(rt, binop(Iop_Or32, - mkexpr(t1), - binop(Iop_And32, - getIReg(rs), - mkU32(0x7fffffff)))); - } else if (1 == rd) { - putIReg(rt, - binop(Iop_Or32, - mkexpr(t1), - binop(Iop_And32, - getIReg(rs), mkU32(0x1)))); - } else { - assign(t2, - unop(Iop_Not32, - binop(Iop_Shl32, - mkU32(0xffffffff), mkU8(rd)))); + if (31 == rd) { + putIReg(rt, binop(Iop_Or32, + mkexpr(t1), + binop(Iop_And32, + getIReg(rs), + mkU32(0x7fffffff)))); + } else if (1 == rd) { + putIReg(rt, + binop(Iop_Or32, + mkexpr(t1), + binop(Iop_And32, + getIReg(rs), mkU32(0x1)))); + } else { + assign(t2, + unop(Iop_Not32, + binop(Iop_Shl32, + mkU32(0xffffffff), mkU8(rd)))); + + putIReg(rt, binop(Iop_Or32, + mkexpr(t1), + binop(Iop_And32, + getIReg(rs), mkexpr(t2)))); + } + break; + } + case 0x1: { /* PREPEND */ + DIP("prepend r%u, r%u, %u", rt, rs, rd); + vassert(!mode64); + t1 = newTemp(Ity_I32); + t2 = newTemp(Ity_I32); + t3 = newTemp(Ity_I32); + + if (0 != rd) { + assign(t1, binop(Iop_Shr32, getIReg(rt), mkU8(rd))); + + if (31 == rd) { + putIReg(rt, binop(Iop_Or32, + mkexpr(t1), + binop(Iop_Shl32, + binop(Iop_And32, + getIReg(rs), + mkU32(0x7fffffff)), + mkU8(1)))); + } else if (1 == rd) { + putIReg(rt, binop(Iop_Or32, + mkexpr(t1), + binop(Iop_Shl32, + binop(Iop_And32, + getIReg(rs), + mkU32(0x1)), + mkU8(31)))); + } else { + assign(t2, binop(Iop_Add32, mkU32(rd), mkU32(0x1))); + + assign(t3, unop(Iop_Not32, + binop(Iop_Shl32, + mkU32(0xffffffff), + unop(Iop_32to8, mkexpr(t2))))); + + putIReg(rt, binop(Iop_Or32, + mkexpr(t1), + binop(Iop_Shl32, + binop(Iop_And32, + getIReg(rs), + mkexpr(t3)), + mkU8(32-rd)))); + } + } + break; + } + case 0x10: { /* BALIGN */ + DIP("balign r%u, r%u, %u", rt, rs, rd); + vassert(!mode64); + t1 = newTemp(Ity_I32); + t2 = newTemp(Ity_I32); + t3 = newTemp(Ity_I32); + + if ((2 != rd) && (0 != rd)) { + assign(t1, binop(Iop_Shl32, + binop(Iop_And32, + mkU32(rd), mkU32(0x3)), + mkU8(0x3))); + assign(t2, binop(Iop_Shl32, + getIReg(rt), + unop(Iop_32to8, mkexpr(t1)))); + assign(t3, binop(Iop_Shr32, + getIReg(rs), + unop(Iop_32to8, + binop(Iop_Shl32, + binop(Iop_Sub32, + mkU32(0x4), + binop(Iop_And32, + mkU32(rd), + mkU32(0x3))), + mkU8(0x3))))); + putIReg(rt, binop(Iop_Or32, mkexpr(t2), mkexpr(t3))); + } + break; + } + default: + return -1; + } + break; /* end of APPEND */ + } + default: + return -1; + } + break; + } + default: + return -1; + } + return 0; +} + +static Int msa_I8_logical(UInt cins, UChar wd, UChar ws) { + IRTemp t1, t2; + UShort operation; + UChar i8; + + operation = (cins >> 24) & 3; + i8 = (cins & 0x00FF0000) >> 16; + switch (operation) { + case 0x00: { /* ANDI.B */ + DIP("ANDI.B w%d, w%d, %d", wd, ws, i8); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + ULong tmp = i8; + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + putWReg(wd, binop(Iop_AndV128, mkexpr(t1), mkexpr(t2))); + break; + } + + case 0x01: { /* ORI.B */ + DIP("ORI.B w%d, w%d, %d", wd, ws, i8); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + ULong tmp = i8; + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + putWReg(wd, binop(Iop_OrV128, mkexpr(t1), mkexpr(t2))); + break; + } + + case 0x02: { /* NORI.B */ + DIP("NORI.B w%d, w%d, %d", wd, ws, i8); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + ULong tmp = i8; + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + putWReg(wd, unop(Iop_NotV128, binop(Iop_OrV128, + mkexpr(t1), mkexpr(t2)))); + break; + } + + case 0x03: { /* XORI.B */ + DIP("XORI.B w%d, w%d, %d", wd, ws, i8); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + ULong tmp = i8; + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + putWReg(wd, binop(Iop_XorV128, mkexpr(t1), mkexpr(t2))); + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_I8_branch(UInt cins, UChar wd, UChar ws) { + IRTemp t1, t2, t3, t4; + UShort operation; + UChar i8; + + operation = (cins >> 24) & 3; + i8 = (cins & 0x00FF0000) >> 16; + switch (operation) { + case 0x00: { /* BMNZI.B */ + DIP("BMNZI.B w%d, w%d, %d", wd, ws, i8); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + ULong tmp = i8; + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t4, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t1, binop(Iop_AndV128, getWReg(ws), mkexpr(t4))); + assign(t2, binop(Iop_AndV128, getWReg(wd), + unop(Iop_NotV128, mkexpr(t4)))); + assign(t3, binop(Iop_OrV128, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* BMZI.B */ + DIP("BMZI.B w%d, w%d, %d", wd, ws, i8); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + ULong tmp = i8; + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t4, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t1, binop(Iop_AndV128, getWReg(wd), mkexpr(t4))); + assign(t2, binop(Iop_AndV128, getWReg(ws), + unop(Iop_NotV128, mkexpr(t4)))); + assign(t3, binop(Iop_OrV128, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* BSELI.B */ + DIP("BSELI.B w%d, w%d, %d", wd, ws, i8); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + ULong tmp = i8; + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t4, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t1, binop(Iop_AndV128, getWReg(wd), mkexpr(t4))); + assign(t2, binop(Iop_AndV128, getWReg(ws), + unop(Iop_NotV128, getWReg(wd)))); + assign(t3, binop(Iop_OrV128, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_I8_shift(UInt cins, UChar wd, UChar ws) { + IRTemp t1, t2; + UShort operation; + UChar i8; + + operation = (cins >> 24) & 3; + i8 = (cins & 0x00FF0000) >> 16; + switch (operation) { + case 0x00: { /* SHF.B */ + DIP("SHF.B w%d, w%d, %d", wd, ws, i8); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, getWReg(wd)); + assign(t2, getWReg(ws)); + Int i; + IRTemp tmp[16]; + + for (i = 0; i < 16; i++) { + tmp[i] = newTemp(Ity_I8); + assign(tmp[i], + binop(Iop_GetElem8x16, mkexpr(t2), + mkU8(i - (i % 4) + + ((i8 >> (i % 4) * 2) & 0x03)))); + } + + putWReg(wd, binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_16HLto32, + binop(Iop_8HLto16, + mkexpr(tmp[15]), + mkexpr(tmp[14])), + binop(Iop_8HLto16, + mkexpr(tmp[13]), + mkexpr(tmp[12]))), + binop(Iop_16HLto32, + binop(Iop_8HLto16, + mkexpr(tmp[11]), + mkexpr(tmp[10])), + binop(Iop_8HLto16, + mkexpr(tmp[9]), + mkexpr(tmp[8])))), + binop(Iop_32HLto64, + binop(Iop_16HLto32, + binop(Iop_8HLto16, + mkexpr(tmp[7]), + mkexpr(tmp[6])), + binop(Iop_8HLto16, + mkexpr(tmp[5]), + mkexpr(tmp[4]))), + binop(Iop_16HLto32, + binop(Iop_8HLto16, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_8HLto16, + mkexpr(tmp[1]), + mkexpr(tmp[0])))))); + break; + } + + case 0x01: { /* SHF.H */ + DIP("SHF.H w%d, w%d, %d", wd, ws, i8); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, getWReg(wd)); + assign(t2, getWReg(ws)); + Int i; + IRTemp tmp[8]; + + for (i = 0; i < 8; i++) { + tmp[i] = newTemp(Ity_I16); + assign(tmp[i], + binop(Iop_GetElem16x8, mkexpr(t2), + mkU8(i - (i % 4) + + ((i8 >> (i % 4) * 2) & 0x03)))); + } + + putWReg(wd, binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_16HLto32, + mkexpr(tmp[7]), mkexpr(tmp[6])), + binop(Iop_16HLto32, + mkexpr(tmp[5]), mkexpr(tmp[4]))), + binop(Iop_32HLto64, + binop(Iop_16HLto32, + mkexpr(tmp[3]), mkexpr(tmp[2])), + binop(Iop_16HLto32, + mkexpr(tmp[1]), mkexpr(tmp[0]))))); + break; + } + + case 0x02: { /* SHF.W */ + DIP("SHF.W w%d, w%d, %d", wd, ws, i8); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, getWReg(wd)); + assign(t2, getWReg(ws)); + Int i; + IRTemp tmp[4]; + + for (i = 0; i < 4; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + binop(Iop_GetElem32x4, mkexpr(t2), + mkU8(i - (i % 4) + + ((i8 >> (i % 4) * 2) & 0x03)))); + } + + putWReg(wd, binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + mkexpr(tmp[3]), mkexpr(tmp[2])), + binop(Iop_32HLto64, + mkexpr(tmp[1]), mkexpr(tmp[0])))); + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_I5_06(UInt cins, UChar wd, UChar ws) { /* I5 (0x06) */ + IRTemp t1, t2, t3; + UShort operation; + UChar df, wt; + + operation = (cins & 0x03800000) >> 23; + df = (cins & 0x00600000) >> 21; + wt = (cins & 0x001F0000) >> 16; + + switch (operation) { + case 0x00: { /* ADDVI */ + ULong tmp = wt; + + switch (df) { + case 0x00: { /* ADDVI.B */ + DIP("ADDVI.B w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Add8x16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* ADDVI.H */ + DIP("ADDVI.H w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Add16x8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* ADDVI.W */ + DIP("ADDVI.W w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + tmp |= (tmp << 32); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Add32x4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* ADDVI.D */ + DIP("ADDVI.D w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Add64x2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + } + + break; + } + + case 0x01: { /* SUBVI */ + ULong tmp = wt; + + switch (df) { + case 0x00: { /* SUBVI.B */ + DIP("SUBVI.B w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Sub8x16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* SUBVI.H */ + DIP("SUBVI.H w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Sub16x8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* SUBVI.W */ + DIP("SUBVI.W w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + tmp |= (tmp << 32); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Sub32x4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* SUBVI.D */ + DIP("SUBVI.D w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Sub64x2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + } + + break; + } + + case 0x02: { /* MAXI_S */ + ULong tmp = wt; + + switch (df) { + case 0x00: { /* MAXI_S.B */ + DIP("MAXI_S.B w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + char stemp = ((int)tmp << 27) >> 27; + tmp = (UChar)stemp; + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t1, getWReg(ws)); + assign(t2,binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Max8Sx16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* MAXI_S.H */ + DIP("MAXI_S.H w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + short stemp = ((int)tmp << 27) >> 27; + tmp = (UShort)stemp; + tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Max16Sx8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* MAXI_S.W */ + DIP("MAXI_S.W w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + int stemp = ((int)tmp << 27) >> 27; + tmp = (UInt)stemp; + tmp |= (tmp << 32); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Max32Sx4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* MAXI_S.D */ + DIP("MAXI_S.D w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + Long stemp = ((Long)tmp << 59) >> 59; + tmp = stemp; + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Max64Sx2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + } + + break; + } + + case 0x03: { /* MAXI_U */ + ULong tmp = wt; + + switch (df) { + case 0x00: { /* MAXI_U.B */ + DIP("MAXI_U.B w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Max8Ux16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* MAXI_U.H */ + DIP("MAXI_U.H w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Max16Ux8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* MAXI_U.W */ + DIP("MAXI_U.W w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + tmp |= (tmp << 32); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Max32Ux4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* MAXI_U.D */ + DIP("MAXI_U.D w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Max64Ux2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + } + + break; + } + + case 0x04: { /* MINI_S */ + ULong tmp = wt; + + switch (df) { + case 0x00: { /* MINI_S.B */ + DIP("MINI_S.B w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + char stemp = ((int)tmp << 27) >> 27; + tmp = (UChar)stemp; + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Min8Sx16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* MINI_S.H */ + DIP("MINI_S.H w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + short stemp = ((int)tmp << 27) >> 27; + tmp = (UShort)stemp; + tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Min16Sx8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* MINI_S.W */ + DIP("MINI_S.W w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + int stemp = ((int)tmp << 27) >> 27; + tmp = (UInt)stemp; + tmp |= (tmp << 32); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Min32Sx4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* MINI_S.D */ + DIP("MINI_S.D w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + Long stemp = ((Long)tmp << 59) >> 59; + tmp = stemp; + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Min64Sx2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + } + + break; + } + + case 0x05: { /* MINI_U */ + ULong tmp = wt; + + switch (df) { + case 0x00: { /* MINI_U.B */ + DIP("MINI_U.B w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Min8Ux16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* MINI_U.H */ + DIP("MINI_U.H w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Min16Ux8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* MINI_U.W */ + DIP("MINI_U.W w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + tmp |= (tmp << 32); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Min32Ux4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* MINI_U.D */ + DIP("MINI_U.D w%d, w%d, %d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_Min64Ux2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + } + + break; + } + + default: { + return -1; + } + } + + return 0; +} + +static Int msa_I5_07(UInt cins, UChar wd, UChar ws) { /* I5 (0x07) / I10 */ + IRTemp t1, t2, t3; + UShort operation; + UChar df, i5; + + operation = (cins & 0x03800000) >> 23; + df = (cins & 0x00600000) >> 21; + i5 = (cins & 0x001F0000) >> 16; + + switch (operation) { + case 0x00: { + ULong tmp = i5; + + switch (df) { + case 0x00: { /* CEQI.B */ + DIP("CEQI.B w%d, w%d, %d", wd, ws, i5); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + char stemp = ((int)tmp << 27) >> 27; + tmp = (UChar)stemp; + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_CmpEQ8x16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* CEQI.H */ + DIP("CEQI.H w%d, w%d, %d", wd, ws, i5); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + short stemp = ((int)tmp << 27) >> 27; + tmp = (UShort)stemp; + tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_CmpEQ16x8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* CEQI.W */ + DIP("CEQI.W w%d, w%d, %d", wd, ws, i5); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + int stemp = ((int)tmp << 27) >> 27; + tmp = (UInt)stemp; + tmp |= (tmp << 32); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_CmpEQ32x4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* CEQI.D */ + DIP("CEQI.D w%d, w%d, %d", wd, ws, i5); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + Long stemp = ((Long)tmp << 59) >> 59; + tmp = stemp; + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_CmpEQ64x2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + } + + break; + } + + case 0x02: { /* CLTI_S.df */ + ULong tmp = i5; + + switch (df) { + case 0x00: { /* CLTI_S.B */ + DIP("CLTI_S.B w%d, w%d, %d", wd, ws, i5); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + char stemp = ((int)tmp << 27) >> 27; + tmp = (UChar)stemp; + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_CmpGT8Sx16, mkexpr(t2), mkexpr(t1))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* CLTI_S.H */ + DIP("CLTI_S.H w%d, w%d, %d", wd, ws, i5); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + short stemp = ((int)tmp << 27) >> 27; + tmp = (UShort)stemp; + tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_CmpGT16Sx8, mkexpr(t2), mkexpr(t1))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* CLTI_S.W */ + DIP("CLTI_S.W w%d, w%d, %d", wd, ws, i5); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + int stemp = ((int)tmp << 27) >> 27; + tmp = (UInt)stemp; + tmp |= (tmp << 32); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_CmpGT32Sx4, mkexpr(t2), mkexpr(t1))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* CLTI_S.D */ + DIP("CLTI_S.D w%d, w%d, %d", wd, ws, i5); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + Long stemp = ((Long)tmp << 59) >> 59; + tmp = stemp; + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_CmpGT64Sx2, mkexpr(t2), mkexpr(t1))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x03: { /* CLTI_U.df */ + ULong tmp = i5; + + switch (df) { + case 0x00: { /* CLTI_U.B */ + DIP("CLTI_U.B w%d, w%d, %d", wd, ws, i5); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_CmpGT8Ux16, mkexpr(t2), mkexpr(t1))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* CLTI_U.H */ + DIP("CLTI_U.H w%d, w%d, %d", wd, ws, i5); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_CmpGT16Ux8, mkexpr(t2), mkexpr(t1))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* CLTI_U.W */ + DIP("CLTI_U.W w%d, w%d, %d", wd, ws, i5); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + tmp |= (tmp << 32); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_CmpGT32Ux4, mkexpr(t2), mkexpr(t1))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* CLTI_U.D */ + DIP("CLTI_U.D w%d, w%d, %d", wd, ws, i5); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_CmpGT64Ux2, mkexpr(t2), mkexpr(t1))); + putWReg(wd, mkexpr(t3)); + break; + } + } + + break; + } + + case 0x04: { /* CLEI_S.df */ + ULong tmp = i5; + + switch (df) { + case 0x00: { /* CLEI_S.B */ + DIP("CLEI_S.B w%d, w%d, %d", wd, ws, i5); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + char stemp = ((int)tmp << 27) >> 27; + tmp = (UChar)stemp; + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_OrV128, binop(Iop_CmpGT8Sx16, + mkexpr(t2), mkexpr(t1)), + binop(Iop_CmpEQ8x16, + mkexpr(t1), mkexpr(t2)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* CLEI_S.H */ + DIP("CLEI_S.H w%d, w%d, %d", wd, ws, i5); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + short stemp = ((int)tmp << 27) >> 27; + tmp = (UShort)stemp; + tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_OrV128, binop(Iop_CmpGT16Sx8, + mkexpr(t2), mkexpr(t1)), + binop(Iop_CmpEQ16x8, + mkexpr(t1), mkexpr(t2)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* CLEI_S.W */ + DIP("CLEI_S.W w%d, w%d, %d", wd, ws, i5); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + int stemp = ((int)tmp << 27) >> 27; + tmp = (UInt)stemp; + tmp |= (tmp << 32); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_OrV128, + binop(Iop_CmpGT32Sx4, + mkexpr(t2), mkexpr(t1)), + binop(Iop_CmpEQ32x4, + mkexpr(t1), mkexpr(t2)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* CLEI_S.D */ + DIP("CLEI_S.D w%d, w%d, %d", wd, ws, i5); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + Long stemp = ((Long)tmp << 59) >> 59; + tmp = stemp; + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_OrV128, + binop(Iop_CmpGT64Sx2, + mkexpr(t2), mkexpr(t1)), + binop(Iop_CmpEQ64x2, + mkexpr(t1), mkexpr(t2)))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x05: { /* CLEI_U.df */ + ULong tmp = i5; + + switch (df) { + case 0x00: { /* CLEI_U.B */ + DIP("CLEI_U.B w%d, w%d, %d", wd, ws, i5); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_OrV128, + binop(Iop_CmpGT8Ux16, + mkexpr(t2), mkexpr(t1)), + binop(Iop_CmpEQ8x16, + mkexpr(t1), mkexpr(t2)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* CLEI_U.H */ + DIP("CLEI_U.H w%d, w%d, %d", wd, ws, i5); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_OrV128, + binop(Iop_CmpGT16Ux8, + mkexpr(t2), mkexpr(t1)), + binop(Iop_CmpEQ16x8, + mkexpr(t1), mkexpr(t2)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* CLEI_U.W */ + DIP("CLEI_U.W w%d, w%d, %d", wd, ws, i5); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + tmp |= (tmp << 32); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_OrV128, + binop(Iop_CmpGT32Ux4, + mkexpr(t2), mkexpr(t1)), + binop(Iop_CmpEQ32x4, + mkexpr(t1), mkexpr(t2)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* CLEI_U.D */ + DIP("CLEI_U.D w%d, w%d, %d", wd, ws, i5); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + assign(t3, binop(Iop_OrV128, + binop(Iop_CmpGT64Ux2, + mkexpr(t2), mkexpr(t1)), + binop(Iop_CmpEQ64x2, + mkexpr(t1), mkexpr(t2)))); + putWReg(wd, mkexpr(t3)); + break; + } + } + + break; + } + + case 0x06: { /* LDI.df */ + ULong tmp; + UShort s10; + s10 = (cins & 0x001FF800) >> 11; + switch (df) { + case 0x00: /* LDI.B */ + DIP("LDI.B w%d, %d", wd, s10); + tmp = s10 & 0xFFl; + tmp = tmp | (tmp << 8) | (tmp << 16) | (tmp << 24) + | (tmp << 32) | (tmp << 40) | (tmp << 48) | + (tmp << 56); + break; + + case 0x01: /* LDI.H */ + DIP("LDI.H w%d, %d", wd, s10); + tmp = extend_s_10to16(s10); + tmp = tmp | (tmp << 16) | (tmp << 32) | (tmp << 48); + break; + + case 0x02: /* LDI.W */ + DIP("LDI.W w%d, %d", wd, s10); + tmp = extend_s_10to32(s10); + tmp = tmp | (tmp << 32); + break; + + case 0x03: /* LDI.D */ + DIP("LDI.D w%d, %d", wd, s10); + tmp = extend_s_10to64(s10); + break; + + default: + return -1; + } + + putWReg(wd, binop(Iop_64HLtoV128, mkU64(tmp), mkU64(tmp))); + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_BIT_09(UInt cins, UChar wd, UChar ws) { /* BIT (0x09) */ + IRTemp t1, t2, t3; + UShort operation; + UChar df, m; + + operation = (cins & 0x03800000) >> 23; + df = (cins & 0x007F0000) >> 16; + + if ((df & 0x70) == 0x70) { // 111mmmm; b + m = df & 0x07; + df = 0; + } else if ((df & 0x60) == 0x60) { // 110mmmm; h + m = df & 0x0F; + df = 1; + } else if ((df & 0x40) == 0x40) { // 10mmmmm; w + m = df & 0x1F; + df = 2; + } else if ((df & 0x00) == 0x00) { // 0mmmmmm; d + m = df & 0x3F; + df = 3; + } + + switch (operation) { + case 0x00: { /* SLLI.df */ + switch (df) { + case 0x00: { /* SLLI.B */ + DIP("SLLI.B w%d, w%d, %d", wd, ws, m); + putWReg(wd, binop(Iop_ShlN8x16, getWReg(ws), mkU8(m))); + break; + } + + case 0x01: { /* SLLI.H */ + DIP("SLLI.H w%d, w%d, %d", wd, ws, m); + putWReg(wd, binop(Iop_ShlN16x8, getWReg(ws), mkU8(m))); + break; + } + + case 0x02: { /* SLLI.W */ + DIP("SLLI.W w%d, w%d, %d", wd, ws, m); + putWReg(wd, binop(Iop_ShlN32x4, getWReg(ws), mkU8(m))); + break; + } + + case 0x03: { /* SLLI.D */ + DIP("SLLI.D w%d, w%d, %d", wd, ws, m); + putWReg(wd, binop(Iop_ShlN64x2, getWReg(ws), mkU8(m))); + break; + } + } + + break; + } + + case 0x01: { /* SRAI.df */ + switch (df) { + case 0x00: { /* SRAI.B */ + DIP("SRAI.B w%d, w%d, %d", wd, ws, m); + putWReg(wd, binop(Iop_SarN8x16, getWReg(ws), mkU8(m))); + break; + } + + case 0x01: { /* SRAI.H */ + DIP("SRAI.H w%d, w%d, %d", wd, ws, m); + putWReg(wd, binop(Iop_SarN16x8, getWReg(ws), mkU8(m))); + break; + } + + case 0x02: { /* SRAI.W */ + DIP("SRAI.W w%d, w%d, %d", wd, ws, m); + putWReg(wd, binop(Iop_SarN32x4, getWReg(ws), mkU8(m))); + break; + } + + case 0x03: { /* SRAI.D */ + DIP("SRAI.D w%d, w%d, %d", wd, ws, m); + putWReg(wd, binop(Iop_SarN64x2, getWReg(ws), mkU8(m))); + break; + } + } + + break; + } + + case 0x02: { /* SRLI.df */ + switch (df) { + case 0x00: { /* SRLI.B */ + DIP("SRLI.B w%d, w%d, %d", wd, ws, m); + putWReg(wd, binop(Iop_ShrN8x16, getWReg(ws), mkU8(m))); + break; + } + + case 0x01: { /* SRLI.H */ + DIP("SRLI.H w%d, w%d, %d", wd, ws, m); + putWReg(wd, binop(Iop_ShrN16x8, getWReg(ws), mkU8(m))); + break; + } + + case 0x02: { /* SRLI.W */ + DIP("SRLI.W w%d, w%d, %d", wd, ws, m); + putWReg(wd, binop(Iop_ShrN32x4, getWReg(ws), mkU8(m))); + break; + } + + case 0x03: { /* SRLI.D */ + DIP("SRLI.D w%d, w%d, %d", wd, ws, m); + putWReg(wd, binop(Iop_ShrN64x2, getWReg(ws), mkU8(m))); + break; + } + } + + break; + } + + case 0x03: { /* BCLRI.df */ + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = 1; + assign(t1, getWReg(ws)); + + switch (df) { + case 0x00: { /* BCLRI.B */ + DIP("BCLRI.B w%d, w%d, %d", wd, ws, m); + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t2, binop(Iop_ShlN8x16, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)),mkU8(m))); + break; + } + + case 0x01: { /* BCLRI.H */ + DIP("BCLRI.H w%d, w%d, %d", wd, ws, m); + tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16); + assign(t2, binop(Iop_ShlN16x8, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), mkU8(m))); + break; + } + + case 0x02: { /* BCLRI.W */ + DIP("BCLRI.W w%d, w%d, %d", wd, ws, m); + tmp |= (tmp << 32); + assign(t2, binop(Iop_ShlN32x4, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), mkU8(m))); + break; + } + + case 0x03: { /* BCLRI.D */ + DIP("BCLRI.D w%d, w%d, %d", wd, ws, m); + assign(t2, binop(Iop_ShlN64x2, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), mkU8(m))); + break; + } + } + + assign(t3, binop(Iop_AndV128, + mkexpr(t1), unop(Iop_NotV128, mkexpr(t2)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x04: { /* BSETI */ + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = 1; + assign(t1, getWReg(ws)); + + switch (df) { + case 0x00: { /* BSETI.B */ + DIP("BSETI.B w%d, w%d, %d", wd, ws, m); + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t2, binop(Iop_ShlN8x16, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), mkU8(m))); + break; + } + + case 0x01: { /* BSETI.H */ + DIP("BSETI.H w%d, w%d, %d", wd, ws, m); + tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16); + assign(t2, binop(Iop_ShlN16x8, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), mkU8(m))); + break; + } + + case 0x02: { /* BSETI.W */ + DIP("BSETI.W w%d, w%d, %d", wd, ws, m); + tmp |= (tmp << 32); + assign(t2, binop(Iop_ShlN32x4, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), mkU8(m))); + break; + } + + case 0x03: { /* BSETI.D */ + DIP("BSETI.D w%d, w%d, %d", wd, ws, m); + assign(t2, binop(Iop_ShlN64x2, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), mkU8(m))); + break; + } + } + + assign(t3, binop(Iop_OrV128, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x05: { /* BNEGI.df */ + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = 1; + assign(t1, getWReg(ws)); + + switch (df) { + case 0x00: { /* BNEGI.B */ + DIP("BNEGI.B w%d, w%d, %d", wd, ws, m); + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t2, binop(Iop_ShlN8x16, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), mkU8(m))); + break; + } + + case 0x01: { /* BNEGI.H */ + DIP("BNEGI.H w%d, w%d, %d", wd, ws, m); + tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16); + assign(t2, binop(Iop_ShlN16x8, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), mkU8(m))); + break; + } + + case 0x02: { /* BNEGI.W */ + DIP("BNEGI.W w%d, w%d, %d", wd, ws, m); + tmp |= (tmp << 32); + assign(t2, binop(Iop_ShlN32x4, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), mkU8(m))); + break; + } + + case 0x03: { /* BNEGI.D */ + DIP("BNEGI.D w%d, w%d, %d", wd, ws, m); + assign(t2, binop(Iop_ShlN64x2, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), mkU8(m))); + break; + } + } + + assign(t3, binop(Iop_XorV128, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x06: { /* BINSLI.df */ + switch (df) { + case 0x00: { /* BINSLI.B */ + DIP("BINSLI.B w%d, w%d, w%d", wd, ws, m); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = 0x8080808080808080ULL; + assign(t1, binop(Iop_SarN8x16, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), mkU8(m))); + assign(t2, + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t1)), getWReg(wd))); + assign(t3, + binop(Iop_AndV128, + mkexpr(t1), getWReg(ws))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t2), mkexpr(t3))); + break; + } + + case 0x01: { /* BINSLI.H */ + DIP("BINSLI.H w%d, w%d, w%d", wd, ws, m); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = 0x8000800080008000ULL; + assign(t1, + binop(Iop_SarN16x8, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), mkU8(m))); + assign(t2, + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t1)), getWReg(wd))); + assign(t3, + binop(Iop_AndV128, + mkexpr(t1), getWReg(ws))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t2), mkexpr(t3))); + break; + } + + case 0x02: { /* BINSLI.W */ + DIP("BINSLI.W w%d, w%d, w%d", wd, ws, m); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = 0x8000000080000000ULL; + assign(t1, + binop(Iop_SarN32x4, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), mkU8(m))); + assign(t2, + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t1)), getWReg(wd))); + assign(t3, + binop(Iop_AndV128, + mkexpr(t1), getWReg(ws))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t2), mkexpr(t3))); + break; + } + + case 0x03: { /* BINSLI.D */ + DIP("BINSLI.D w%d, w%d, w%d", wd, ws, m); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = 0x8000000000000000ULL; + assign(t1, + binop(Iop_SarN64x2, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), mkU8(m))); + assign(t2, + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t1)), getWReg(wd))); + assign(t3, + binop(Iop_AndV128, + mkexpr(t1), getWReg(ws))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t2), mkexpr(t3))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x07: { + switch (df) { + case 0x00: { /* BINSRI.B */ + DIP("BINSRI.B w%d, w%d, w%d", wd, ws, m); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = 0xFEFEFEFEFEFEFEFEULL; + assign(t1, + binop(Iop_ShlN8x16, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), mkU8(m))); + assign(t2, + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t1)), getWReg(ws))); + assign(t3, + binop(Iop_AndV128, + mkexpr(t1), getWReg(wd))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t2), mkexpr(t3))); + break; + } + + case 0x01: { /* BINSRI.H */ + DIP("BINSRI.H w%d, w%d, w%d", wd, ws, m); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = 0xFFFEFFFEFFFEFFFEULL; + assign(t1, + binop(Iop_ShlN16x8, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + mkU8(m))); + assign(t2, + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t1)), + getWReg(ws))); + assign(t3, + binop(Iop_AndV128, + mkexpr(t1), getWReg(wd))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t2), mkexpr(t3))); + break; + } + + case 0x02: { /* BINSRI.W */ + DIP("BINSRI.W w%d, w%d, w%d", wd, ws, m); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = 0xFFFFFFFEFFFFFFFEULL; + assign(t1, + binop(Iop_ShlN32x4, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + mkU8(m))); + assign(t2, + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t1)), + getWReg(ws))); + assign(t3, + binop(Iop_AndV128, + mkexpr(t1), getWReg(wd))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t2), mkexpr(t3))); + break; + } + + case 0x03: { /* BINSRI.D */ + DIP("BINSRI.D w%d, w%d, w%d", wd, ws, m); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = -2; + assign(t1, + binop(Iop_ShlN64x2, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + mkU8(m))); + assign(t2, + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t1)), + getWReg(ws))); + assign(t3, + binop(Iop_AndV128, + mkexpr(t1), getWReg(wd))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t2), mkexpr(t3))); + break; + } + + default: + return -1; + } + + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_BIT_0A(UInt cins, UChar wd, UChar ws) { /* BIT (0x0A) */ + IRTemp t1, t2; + UShort operation; + UChar df, m; + + operation = (cins & 0x03800000) >> 23; + df = (cins & 0x007F0000) >> 16; + + if ((df & 0x70) == 0x70) { // 111mmmm; b + m = df & 0x07; + df = 0; + } else if ((df & 0x60) == 0x60) { // 110mmmm; h + m = df & 0x0F; + df = 1; + } else if ((df & 0x40) == 0x40) { // 10mmmmm; w + m = df & 0x1F; + df = 2; + } else if ((df & 0x00) == 0x00) { // 0mmmmmm; d + m = df & 0x3F; + df = 3; + } + + switch (operation) { + case 0x00: { /* SAT_S.df */ + switch (df) { + case 0x00: { /* SAT_S.B */ + DIP("SAT_S.B w%d, w%d, %d", wd, ws, m); + t1 = newTemp(Ity_V128); + assign(t1, binop(Iop_SarN8x16, getWReg(ws), mkU8(7))); + + if (m == 0) { + putWReg(wd, mkexpr(t1)); + } else { + t2 = newTemp(Ity_V128); + assign(t2, + binop(Iop_SarN8x16, getWReg(ws), mkU8(m))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_OrV128, + binop(Iop_AndV128, + binop(Iop_CmpEQ8x16, + mkexpr(t1), + mkexpr(t2)), + getWReg(ws)), + binop(Iop_ShlN8x16, + binop(Iop_CmpGT8Sx16, + mkexpr(t1), + mkexpr(t2)), + mkU8(m))), + binop(Iop_ShrN8x16, + binop(Iop_CmpGT8Sx16, + mkexpr(t2), + mkexpr(t1)), + mkU8(8 - m)))); + } + + break; + } + + case 0x01: { /* SAT_S.H */ + DIP("SAT_S.H w%d, w%d, %d", wd, ws, m); + t1 = newTemp(Ity_V128); + assign(t1, binop(Iop_SarN16x8, getWReg(ws), mkU8(15))); + + if (m == 0) { + putWReg(wd, mkexpr(t1)); + } else { + t2 = newTemp(Ity_V128); + assign(t2, + binop(Iop_SarN16x8, + getWReg(ws), + mkU8(m))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_OrV128, + binop(Iop_AndV128, + binop(Iop_CmpEQ16x8, + mkexpr(t1), + mkexpr(t2)), + getWReg(ws)), + binop(Iop_ShlN16x8, + binop(Iop_CmpGT16Sx8, + mkexpr(t1), + mkexpr(t2)), + mkU8(m))), + binop(Iop_ShrN16x8, + binop(Iop_CmpGT16Sx8, + mkexpr(t2), + mkexpr(t1)), + mkU8(16 - m)))); + } + + break; + } + + case 0x02: { /* SAT_S.W */ + DIP("SAT_S.W w%d, w%d, %d", wd, ws, m); + t1 = newTemp(Ity_V128); + assign(t1, binop(Iop_SarN32x4, getWReg(ws), mkU8(31))); + + if (m == 0) { + putWReg(wd, mkexpr(t1)); + } else { + t2 = newTemp(Ity_V128); + assign(t2, + binop(Iop_SarN32x4, + getWReg(ws), + mkU8(m))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_OrV128, + binop(Iop_AndV128, + binop(Iop_CmpEQ32x4, + mkexpr(t1), + mkexpr(t2)), + getWReg(ws)), + binop(Iop_ShlN32x4, + binop(Iop_CmpGT32Sx4, + mkexpr(t1), + mkexpr(t2)), + mkU8(m))), + binop(Iop_ShrN32x4, + binop(Iop_CmpGT32Sx4, + mkexpr(t2), + mkexpr(t1)), + mkU8(32 - m)))); + } + + break; + } + + case 0x03: { /* SAT_S.D */ + DIP("SAT_S.D w%d, w%d, %d", wd, ws, m); + t1 = newTemp(Ity_V128); + assign(t1, binop(Iop_SarN64x2, getWReg(ws), mkU8(63))); + + if (m == 0) { + putWReg(wd, mkexpr(t1)); + } else { + t2 = newTemp(Ity_V128); + assign(t2, + binop(Iop_SarN64x2, + getWReg(ws), + mkU8(m))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_OrV128, + binop(Iop_AndV128, + binop(Iop_CmpEQ64x2, + mkexpr(t1), + mkexpr(t2)), + getWReg(ws)), + binop(Iop_ShlN64x2, + binop(Iop_CmpGT64Sx2, + mkexpr(t1), + mkexpr(t2)), + mkU8(m))), + binop(Iop_ShrN64x2, + binop(Iop_CmpGT64Sx2, + mkexpr(t2), + mkexpr(t1)), + mkU8(64 - m)))); + } + + break; + } + } + + break; + } + + case 0x01: { /* SAT_U.df */ + switch (df) { + case 0x00: { /* SAT_U.B */ + DIP("SAT_U.B w%d, w%d, %d", wd, ws, m); + + if (m == 7) { + putWReg(wd, getWReg(ws)); + } else { + t1 = newTemp(Ity_V128); + assign(t1, + binop(Iop_CmpEQ8x16, + binop(Iop_ShrN8x16, + getWReg(ws), + mkU8(m + 1)), + binop(Iop_64HLtoV128, + mkU64(0), mkU64(0)))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t1), + getWReg(ws)), + binop(Iop_ShrN8x16, + unop(Iop_NotV128, + mkexpr(t1)), + mkU8(7 - m)))); + } + + break; + } + + case 0x01: { /* SAT_U.H */ + DIP("SAT_U.H w%d, w%d, %d", wd, ws, m); + + if (m == 15) { + putWReg(wd, getWReg(ws)); + } else { + t1 = newTemp(Ity_V128); + assign(t1, + binop(Iop_CmpEQ16x8, + binop(Iop_ShrN16x8, + getWReg(ws), + mkU8(m + 1)), + binop(Iop_64HLtoV128, + mkU64(0), mkU64(0)))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t1), + getWReg(ws)), + binop(Iop_ShrN16x8, + unop(Iop_NotV128, + mkexpr(t1)), + mkU8(15 - m)))); + } + + break; + } + + case 0x02: { /* SAT_U.W */ + DIP("SAT_U.W w%d, w%d, %d", wd, ws, m); + + if (m == 31) { + putWReg(wd, getWReg(ws)); + } else { + t1 = newTemp(Ity_V128); + assign(t1, + binop(Iop_CmpEQ32x4, + binop(Iop_ShrN32x4, + getWReg(ws), + mkU8(m + 1)), + binop(Iop_64HLtoV128, + mkU64(0), mkU64(0)))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t1), \ + getWReg(ws)), + binop(Iop_ShrN32x4, + unop(Iop_NotV128, + mkexpr(t1)), + mkU8(31 - m)))); + } + + break; + } + + case 0x03: { /* SAT_U.D */ + DIP("SAT_U.D w%d, w%d, %d", wd, ws, m); + + if (m == 63) { + putWReg(wd, getWReg(ws)); + } else { + t1 = newTemp(Ity_V128); + assign(t1, + binop(Iop_CmpEQ64x2, + binop(Iop_ShrN64x2, + getWReg(ws), + mkU8(m + 1)), + binop(Iop_64HLtoV128, + mkU64(0), mkU64(0)))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t1), + getWReg(ws)), + binop(Iop_ShrN64x2, + unop(Iop_NotV128, + mkexpr(t1)), + mkU8(63 - m)))); + } + + break; + } + } + + break; + } + + case 0x02: { /* SRARI.df */ + switch (df) { + case 0x00: { /* SRARI.B */ + DIP("SRARI.B w%d, w%d, %d", wd, ws, m); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, + binop(Iop_SarN8x16, + getWReg(ws), + mkU8(m))); + assign(t2, + binop(Iop_ShrN8x16, + binop(Iop_ShlN8x16, + getWReg(ws), + mkU8(8 - m)), + mkU8(7))); + + if (m) putWReg(wd, binop(Iop_Add8x16, + mkexpr(t1), + mkexpr(t2))); + else putWReg(wd, mkexpr(t1)); + + break; + } + + case 0x01: { /* SRARI.H */ + DIP("SRARI.H w%d, w%d, %d", wd, ws, m); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, + binop(Iop_SarN16x8, + getWReg(ws), + mkU8(m))); + assign(t2, + binop(Iop_ShrN16x8, + binop(Iop_ShlN16x8, + getWReg(ws), + mkU8(16 - m)), + mkU8(15))); + + if (m) + putWReg(wd, + binop(Iop_Add16x8, + mkexpr(t1), mkexpr(t2))); + else putWReg(wd, mkexpr(t1)); + + break; + } + + case 0x02: { /* SRARI.W */ + DIP("SRARI.W w%d, w%d, %d", wd, ws, m); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, + binop(Iop_SarN32x4, + getWReg(ws), + mkU8(m))); + assign(t2, + binop(Iop_ShrN32x4, + binop(Iop_ShlN32x4, + getWReg(ws), + mkU8(32 - m)), + mkU8(31))); + + if (m) + putWReg(wd, + binop(Iop_Add32x4, + mkexpr(t1), mkexpr(t2))); + else putWReg(wd, mkexpr(t1)); + + break; + } + + case 0x03: { /* SRARI.D */ + DIP("SRARI.D w%d, w%d, %d", wd, ws, m); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, + binop(Iop_SarN64x2, + getWReg(ws), + mkU8(m))); + assign(t2, + binop(Iop_ShrN64x2, + binop(Iop_ShlN64x2, + getWReg(ws), + mkU8(64 - m)), + mkU8(63))); + + if (m) + putWReg(wd, + binop(Iop_Add64x2, + mkexpr(t1), mkexpr(t2))); + else putWReg(wd, mkexpr(t1)); + + break; + } + } + + break; + } + + case 0x03: { /* SRLRI.df */ + switch (df) { + case 0x00: { /* SRLRI.B */ + DIP("SRLRI.B w%d, w%d, %d", wd, ws, m); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, + binop(Iop_ShrN8x16, + getWReg(ws), + mkU8(m))); + assign(t2, + binop(Iop_ShrN8x16, + binop(Iop_ShlN8x16, + getWReg(ws), + mkU8(8 - m)), + mkU8(7))); + + if (m) + putWReg(wd, + binop(Iop_Add8x16, + mkexpr(t1), mkexpr(t2))); + else putWReg(wd, mkexpr(t1)); + + break; + } + + case 0x01: { /* SRLRI.H */ + DIP("SRLRI.H w%d, w%d, %d", wd, ws, m); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, + binop(Iop_ShrN16x8, + getWReg(ws), + mkU8(m))); + assign(t2, + binop(Iop_ShrN16x8, + binop(Iop_ShlN16x8, + getWReg(ws), + mkU8(16 - m)), + mkU8(15))); + + if (m) + putWReg(wd, + binop(Iop_Add16x8, + mkexpr(t1), mkexpr(t2))); + else putWReg(wd, mkexpr(t1)); + + break; + } + + case 0x02: { /* SRLRI.W */ + DIP("SRLRI.W w%d, w%d, %d", wd, ws, m); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, + binop(Iop_ShrN32x4, + getWReg(ws), + mkU8(m))); + assign(t2, + binop(Iop_ShrN32x4, + binop(Iop_ShlN32x4, + getWReg(ws), + mkU8(32 - m)), + mkU8(31))); + + if (m) + putWReg(wd, + binop(Iop_Add32x4, + mkexpr(t1), mkexpr(t2))); + else putWReg(wd, mkexpr(t1)); + + break; + } + + case 0x03: { /* SRLRI.D */ + DIP("SRLRI.D w%d, w%d, %d", wd, ws, m); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, + binop(Iop_ShrN64x2, + getWReg(ws), + mkU8(m))); + assign(t2, + binop(Iop_ShrN64x2, + binop(Iop_ShlN64x2, + getWReg(ws), + mkU8(64 - m)), + mkU8(63))); + + if (m) + putWReg(wd, + binop(Iop_Add64x2, + mkexpr(t1), mkexpr(t2))); + else putWReg(wd, mkexpr(t1)); + + break; + } + } + + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_3R_0D(UInt cins, UChar wd, UChar ws) { /* 3R (0x0D) */ + IRTemp t1, t2, t3; + UShort operation; + UChar df, wt; + + operation = (cins & 0x03800000) >> 23; + df = (cins & 0x00600000) >> 21; + wt = (cins & 0x001F0000) >> 16; + + switch (operation) { + case 0x00: { /* SLL.df */ + switch (df) { + case 0x00: { /* SLL.B */ + DIP("SLL.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Shl8x16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* SLL.H */ + DIP("SLL.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Shl16x8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* SLL.W */ + DIP("SLL.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Shl32x4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* SLL.D */ + DIP("SLL.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Shl64x2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x01: { /* SRA.df */ + switch (df) { + case 0x00: { /* SRA.B */ + DIP("SRA.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Sar8x16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* SRA.H */ + DIP("SRA.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Sar16x8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* SRA.W */ + DIP("SRA.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Sar32x4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* SRA.D */ + DIP("SRA.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Sar64x2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x02: { /* SRL.df */ + switch (df) { + case 0x00: { /* SRL.B */ + DIP("SRL.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Shr8x16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* SRL.H */ + DIP("SRL.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Shr16x8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* SRL.W */ + DIP("SRL.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Shr32x4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* SRL.D */ + DIP("SRL.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Shr64x2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x03: { /* BCLR.df */ + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = 1; + assign(t1, getWReg(ws)); + + switch (df) { + case 0x00: { /* BCLR.B */ + DIP("BCLR.B w%d, w%d, w%d", wd, ws, wt); + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t2, binop(Iop_Shl8x16, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + getWReg(wt))); + break; + } + + case 0x01: { /* BCLR.H */ + DIP("BCLR.H w%d, w%d, w%d", wd, ws, wt); + tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16); + assign(t2, + binop(Iop_Shl16x8, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + getWReg(wt))); + break; + } + + case 0x02: { /* BCLR.W */ + DIP("BCLR.W w%d, w%d, w%d", wd, ws, wt); + tmp |= (tmp << 32); + assign(t2, + binop(Iop_Shl32x4, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + getWReg(wt))); + break; + } + + case 0x03: { /* BCLR.D */ + DIP("BCLR.D w%d, w%d, w%d", wd, ws, wt); + assign(t2, + binop(Iop_Shl64x2, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + getWReg(wt))); + break; + } + } + + assign(t3, + binop(Iop_AndV128, + mkexpr(t1), unop(Iop_NotV128, mkexpr(t2)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x04: { /* BSET.df */ + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = 1; + assign(t1, getWReg(ws)); + + switch (df) { + case 0x00: { /* BSET.B */ + DIP("BSET.B w%d, w%d, w%d", wd, ws, wt); + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t2, + binop(Iop_Shl8x16, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + getWReg(wt))); + break; + } + + case 0x01: { /* BSET.H */ + DIP("BSET.H w%d, w%d, w%d", wd, ws, wt); + tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16); + assign(t2, + binop(Iop_Shl16x8, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + getWReg(wt))); + break; + } + + case 0x02: { /* BSET.W */ + DIP("BSET.W w%d, w%d, w%d", wd, ws, wt); + tmp |= (tmp << 32); + assign(t2, + binop(Iop_Shl32x4, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + getWReg(wt))); + break; + } + + case 0x03: { /* BSET.D */ + DIP("BSET.D w%d, w%d, w%d", wd, ws, wt); + assign(t2, + binop(Iop_Shl64x2, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + getWReg(wt))); + break; + } + } + + assign(t3, binop(Iop_OrV128, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x05: { /* BNEG.df */ + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = 1; + assign(t1, getWReg(ws)); + + switch (df) { + case 0x00: { /* BNEG.B */ + DIP("BNEG.B w%d, w%d, w%d", wd, ws, wt); + tmp |= (tmp << 56) | (tmp << 48) | (tmp << 40) | + (tmp << 32) | (tmp << 24) | (tmp << 16) | + (tmp << 8); + assign(t2, + binop(Iop_Shl8x16, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + getWReg(wt))); + break; + } + + case 0x01: { /* BNEG.H */ + DIP("BNEG.H w%d, w%d, w%d", wd, ws, wt); + tmp |= (tmp << 48) | (tmp << 32) | (tmp << 16); + assign(t2, + binop(Iop_Shl16x8, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + getWReg(wt))); + break; + } + + case 0x02: { /* BNEG.W */ + DIP("BNEG.W w%d, w%d, w%d", wd, ws, wt); + tmp |= (tmp << 32); + assign(t2, + binop(Iop_Shl32x4, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + getWReg(wt))); + break; + } + + case 0x03: { /* BNEG.D */ + DIP("BNEG.D w%d, w%d, w%d", wd, ws, wt); + assign(t2, + binop(Iop_Shl64x2, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + getWReg(wt))); + break; + } + } + + assign(t3, binop(Iop_XorV128, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x06: { /* BINSL.df */ + switch (df) { + case 0x00: { /* BINSL.B */ + DIP("BINSL.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = 0x8080808080808080ULL; + assign(t1, + binop(Iop_Sar8x16, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + getWReg(wt))); + assign(t2, + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t1)), + getWReg(wd))); + assign(t3, + binop(Iop_AndV128, + mkexpr(t1), getWReg(ws))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t2), mkexpr(t3))); + break; + } + + case 0x01: { /* BINSL.H */ + DIP("BINSL.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = 0x8000800080008000ULL; + assign(t1, + binop(Iop_Sar16x8, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + getWReg(wt))); + assign(t2, + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t1)), + getWReg(wd))); + assign(t3, + binop(Iop_AndV128, + mkexpr(t1), getWReg(ws))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t2), mkexpr(t3))); + break; + } + + case 0x02: { /* BINSL.W */ + DIP("BINSL.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = 0x8000000080000000ULL; + assign(t1, + binop(Iop_Sar32x4, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + getWReg(wt))); + assign(t2, + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t1)), + getWReg(wd))); + assign(t3, + binop(Iop_AndV128, + mkexpr(t1), getWReg(ws))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t2), mkexpr(t3))); + break; + } + + case 0x03: { /* BINSL.D */ + DIP("BINSL.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = 0x8000000000000000ULL; + assign(t1, + binop(Iop_Sar64x2, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + getWReg(wt))); + assign(t2, + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t1)), + getWReg(wd))); + assign(t3, + binop(Iop_AndV128, + mkexpr(t1), getWReg(ws))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t2), mkexpr(t3))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x07: { /* BINSR.df */ + switch (df) { + case 0x00: { /* BINSR.B */ + DIP("BINSR.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = 0xFEFEFEFEFEFEFEFEULL; + assign(t1, + binop(Iop_Shl8x16, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + getWReg(wt))); + assign(t2, + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t1)), + getWReg(ws))); + assign(t3, + binop(Iop_AndV128, + mkexpr(t1), getWReg(wd))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t2), mkexpr(t3))); + break; + } + + case 0x01: { /* BINSR.H */ + DIP("BINSR.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = 0xFFFEFFFEFFFEFFFEULL; + assign(t1, + binop(Iop_Shl16x8, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + getWReg(wt))); + assign(t2, + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t1)), + getWReg(ws))); + assign(t3, + binop(Iop_AndV128, + mkexpr(t1), getWReg(wd))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t2), mkexpr(t3))); + break; + } + + case 0x02: { /* BINSR.W */ + DIP("BINSR.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = 0xFFFFFFFEFFFFFFFEULL; + assign(t1, + binop(Iop_Shl32x4, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + getWReg(wt))); + assign(t2, + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t1)), + getWReg(ws))); + assign(t3, + binop(Iop_AndV128, + mkexpr(t1), getWReg(wd))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t2), mkexpr(t3))); + break; + } + + case 0x03: { /* BINSR.D */ + DIP("BINSR.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + ULong tmp = -2; + assign(t1, + binop(Iop_Shl64x2, + binop(Iop_64HLtoV128, + mkU64(tmp), mkU64(tmp)), + getWReg(wt))); + assign(t2, + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t1)), + getWReg(ws))); + assign(t3, + binop(Iop_AndV128, + mkexpr(t1), getWReg(wd))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t2), mkexpr(t3))); + break; + } + + default: + return -1; + } + + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_3R_0E(UInt cins, UChar wd, UChar ws) { /* 3R (0x0E) */ + IRTemp t1, t2, t3, t4; + UShort operation; + UChar df, wt; + + operation = (cins & 0x03800000) >> 23; + df = (cins & 0x00600000) >> 21; + wt = (cins & 0x001F0000) >> 16; + + switch (operation) { + case 0x00: { /* ADDV.df */ + switch (df) { + case 0x00: { /* ADDV.B */ + DIP("ADDV.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Add8x16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* ADDV.H */ + DIP("ADDV.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Add16x8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* ADDV.W */ + DIP("ADDV.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Add32x4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* ADDV.D */ + DIP("ADDV.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Add64x2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x01: { /* SUBV.df */ + switch (df) { + case 0x00: { /* SUBV.B */ + DIP("SUBV.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Sub8x16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* SUBV.H */ + DIP("SUBV.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Sub16x8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* SUBV.W */ + DIP("SUBV.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Sub32x4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* SUBV.D */ + DIP("SUBV.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Sub64x2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x02: { /* MAX_S.df */ + switch (df) { + case 0x00: { /* MAX_S.B */ + DIP("MAX_S.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Max8Sx16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* MAX_S.H */ + DIP("MAX_S.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Max16Sx8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* MAX_S.W */ + DIP("MAX_S.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Max32Sx4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* MAX_S.D */ + DIP("MAX_S.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Max64Sx2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x03: { /* MAX_U.df */ + switch (df) { + case 0x00: { /* MAX_U.B */ + DIP("MAX_U.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Max8Ux16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* MAX_U.H */ + DIP("MAX_U.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Max16Ux8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* MAX_U.W */ + DIP("MAX_U.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Max32Ux4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* MAX_U.D */ + DIP("MAX_U.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Max64Ux2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x04: { /* MIN_S.df */ + switch (df) { + case 0x00: { /* MIN_S.B */ + DIP("MIN_S.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Min8Sx16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* MIN_S.H */ + DIP("MIN_S.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Min16Sx8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* MIN_S.W */ + DIP("MIN_S.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Min32Sx4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* MIN_S.D */ + DIP("MIN_S.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Min64Sx2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x05: { /* MIN_U.df */ + switch (df) { + case 0x00: { /* MIN_U.B */ + DIP("MIN_U.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Min8Ux16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* MIN_U.H */ + DIP("MIN_U.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Min16Ux8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* MIN_U.W */ + DIP("MIN_U.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Min32Ux4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* MIN_U.D */ + DIP("MIN_U.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Min64Ux2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x06: { /* MAX_A.df */ + switch (df) { + case 0x00: { /* MAX_A.B */ + DIP("MAX_A.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, unop(Iop_Abs8x16, getWReg(ws))); + assign(t2, unop(Iop_Abs8x16, getWReg(wt))); + assign(t4, binop(Iop_CmpGT8Ux16, mkexpr(t1), mkexpr(t2))); + assign(t3, binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t4), + getWReg(ws)), + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t4)), + getWReg(wt)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* MAX_A.H */ + DIP("MAX_A.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, unop(Iop_Abs16x8, getWReg(ws))); + assign(t2, unop(Iop_Abs16x8, getWReg(wt))); + assign(t4, binop(Iop_CmpGT16Ux8, mkexpr(t1), mkexpr(t2))); + assign(t3, binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t4), + getWReg(ws)), + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t4)), + getWReg(wt)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* MAX_A.W */ + DIP("MAX_A.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, unop(Iop_Abs32x4, getWReg(ws))); + assign(t2, unop(Iop_Abs32x4, getWReg(wt))); + assign(t4, binop(Iop_CmpGT32Ux4, mkexpr(t1), mkexpr(t2))); + assign(t3, binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t4), + getWReg(ws)), + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t4)), + getWReg(wt)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* MAX_A.D */ + DIP("MAX_A.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, unop(Iop_Abs64x2, getWReg(ws))); + assign(t2, unop(Iop_Abs64x2, getWReg(wt))); + assign(t4, binop(Iop_CmpGT64Ux2, mkexpr(t1), mkexpr(t2))); + assign(t3, binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t4), + getWReg(ws)), + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t4)), + getWReg(wt)))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x07: { /* MIN_A.df */ + switch (df) { + case 0x00: { /* MIN_A.B */ + DIP("MIN_A.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, unop(Iop_Abs8x16, getWReg(ws))); + assign(t2, unop(Iop_Abs8x16, getWReg(wt))); + assign(t4, binop(Iop_OrV128, + binop(Iop_CmpGT8Ux16, + mkexpr(t1), mkexpr(t2)), + binop(Iop_CmpEQ8x16, + mkexpr(t1), mkexpr(t2)))); + assign(t3, binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t4), + getWReg(wt)), + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t4)), + getWReg(ws)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* MIN_A.H */ + DIP("MIN_A.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, unop(Iop_Abs16x8, getWReg(ws))); + assign(t2, unop(Iop_Abs16x8, getWReg(wt))); + assign(t4, binop(Iop_OrV128, + binop(Iop_CmpGT16Ux8, + mkexpr(t1), mkexpr(t2)), + binop(Iop_CmpEQ16x8, + mkexpr(t1), mkexpr(t2)))); + assign(t3, binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t4), + getWReg(wt)), + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t4)), + getWReg(ws)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* MIN_A.W */ + DIP("MIN_A.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, unop(Iop_Abs32x4, getWReg(ws))); + assign(t2, unop(Iop_Abs32x4, getWReg(wt))); + assign(t4, binop(Iop_OrV128, + binop(Iop_CmpGT32Ux4, + mkexpr(t1), mkexpr(t2)), + binop(Iop_CmpEQ32x4, + mkexpr(t1), mkexpr(t2)))); + assign(t3, binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t4), + getWReg(wt)), + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t4)), + getWReg(ws)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* MIN_A.D */ + DIP("MIN_A.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, unop(Iop_Abs64x2, getWReg(ws))); + assign(t2, unop(Iop_Abs64x2, getWReg(wt))); + assign(t4, binop(Iop_OrV128, + binop(Iop_CmpGT64Ux2, + mkexpr(t1), mkexpr(t2)), + binop(Iop_CmpEQ64x2, + mkexpr(t1), mkexpr(t2)))); + assign(t3, binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t4), + getWReg(wt)), + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t4)), + getWReg(ws)))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_3R_0F(UInt cins, UChar wd, UChar ws) { /* 3R (0x0F) */ + IRTemp t1, t2, t3; + UShort operation; + UChar df, wt; + + operation = (cins & 0x03800000) >> 23; + df = (cins & 0x00600000) >> 21; + wt = (cins & 0x001F0000) >> 16; + + switch (operation) { + case 0x00: { /* CEQ.df */ + switch (df) { + case 0x00: { /* CEQ.B */ + DIP("CEQ.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_CmpEQ8x16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* CEQ.H */ + DIP("CEQ.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_CmpEQ16x8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* CEQ.W */ + DIP("CEQ.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_CmpEQ32x4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* CEQ.D */ + DIP("CEQ.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_CmpEQ64x2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x02: { /* CLT_S.df */ + switch (df) { + case 0x00: { /* CLT_S.B */ + DIP("CLT_S.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_CmpGT8Sx16, mkexpr(t2), mkexpr(t1))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* CLT_S.H */ + DIP("CLT_S.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_CmpGT16Sx8, mkexpr(t2), mkexpr(t1))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* CLT_S.W */ + DIP("CLT_S.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_CmpGT32Sx4, mkexpr(t2), mkexpr(t1))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* CLT_S.D */ + DIP("CLT_S.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_CmpGT64Sx2, mkexpr(t2), mkexpr(t1))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x03: { /* CLT_U.df */ + switch (df) { + case 0x00: { /* CLT_U.B */ + DIP("CLT_U.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_CmpGT8Ux16, mkexpr(t2), mkexpr(t1))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* CLT_U.H */ + DIP("CLT_U.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_CmpGT16Ux8, mkexpr(t2), mkexpr(t1))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* CLT_U.W */ + DIP("CLT_U.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_CmpGT32Ux4, mkexpr(t2), mkexpr(t1))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* CLT_U.D */ + DIP("CLT_U.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_CmpGT64Ux2, mkexpr(t2), mkexpr(t1))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x04: { /* CLE_S.df */ + switch (df) { + case 0x00: { /* CLE_S.B */ + DIP("CLE_S.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_OrV128, + binop(Iop_CmpGT8Sx16, + mkexpr(t2), mkexpr(t1)), + binop(Iop_CmpEQ8x16, + mkexpr(t1), mkexpr(t2)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* CLE_S.H */ + DIP("CLE_S.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_OrV128, + binop(Iop_CmpGT16Sx8, + mkexpr(t2), mkexpr(t1)), + binop(Iop_CmpEQ16x8, + mkexpr(t1), mkexpr(t2)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* CLE_S.W */ + DIP("CLE_S.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_OrV128, + binop(Iop_CmpGT32Sx4, + mkexpr(t2), mkexpr(t1)), + binop(Iop_CmpEQ32x4, + mkexpr(t1), mkexpr(t2)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* CLE_S.D */ + DIP("CLE_S.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_OrV128, + binop(Iop_CmpGT64Sx2, + mkexpr(t2), mkexpr(t1)), + binop(Iop_CmpEQ64x2, + mkexpr(t1), mkexpr(t2)))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x05: { /* CLE_U.df */ + switch (df) { + case 0x00: { /* CLE_U.B */ + DIP("CLE_U.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_OrV128, + binop(Iop_CmpGT8Ux16, + mkexpr(t2), mkexpr(t1)), + binop(Iop_CmpEQ8x16, + mkexpr(t1), mkexpr(t2)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* CLE_U.H */ + DIP("CLE_U.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_OrV128, + binop(Iop_CmpGT16Ux8, + mkexpr(t2), mkexpr(t1)), + binop(Iop_CmpEQ16x8, + mkexpr(t1), mkexpr(t2)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* CLE_U.W */ + DIP("CLE_U.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_OrV128, + binop(Iop_CmpGT32Ux4, + mkexpr(t2), mkexpr(t1)), + binop(Iop_CmpEQ32x4, + mkexpr(t1), mkexpr(t2)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* CLE_U.D */ + DIP("CLE_U.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_OrV128, + binop(Iop_CmpGT64Ux2, + mkexpr(t2), mkexpr(t1)), + binop(Iop_CmpEQ64x2, + mkexpr(t1), mkexpr(t2)))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_3R_10(UInt cins, UChar wd, UChar ws) { /* 3R (0x10) */ + IRTemp t1, t2, t3, t4; + UShort operation; + UChar df, wt; + + operation = (cins & 0x03800000) >> 23; + df = (cins & 0x00600000) >> 21; + wt = (cins & 0x001F0000) >> 16; + + switch (operation) { + case 0x00: { /* ADD_A.df */ + switch (df) { + case 0x00: { /* ADD_A.B */ + DIP("ADD_A.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, unop(Iop_Abs8x16, getWReg(ws))); + assign(t2, unop(Iop_Abs8x16, getWReg(wt))); + assign(t3, binop(Iop_Add8x16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* ADD_A.H */ + DIP("ADD_A.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, unop(Iop_Abs16x8, getWReg(ws))); + assign(t2, unop(Iop_Abs16x8, getWReg(wt))); + assign(t3, binop(Iop_Add16x8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* ADD_A.W */ + DIP("ADD_A.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, unop(Iop_Abs32x4, getWReg(ws))); + assign(t2, unop(Iop_Abs32x4, getWReg(wt))); + assign(t3, binop(Iop_Add32x4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* ADD_A.D */ + DIP("ADD_A.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, unop(Iop_Abs64x2, getWReg(ws))); + assign(t2, unop(Iop_Abs64x2, getWReg(wt))); + assign(t3, binop(Iop_Add64x2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x01: { /* ADDS_A.df */ + switch (df) { + case 0x00: { /* ADDS_A.B */ + DIP("ADDS_A.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, unop(Iop_Abs8x16, getWReg(ws))); + assign(t2, unop(Iop_Abs8x16, getWReg(wt))); + assign(t3, binop(Iop_SarN8x16, + binop(Iop_AndV128, + mkexpr(t1), + getWReg(ws)), + mkU8(7))); + assign(t4, binop(Iop_SarN8x16, + binop(Iop_AndV128, + mkexpr(t2), + getWReg(wt)), + mkU8(7))); + putWReg(wd, binop(Iop_QAdd8Sx16, + binop(Iop_OrV128, + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t3)), + mkexpr(t1)), + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t1)), + mkexpr(t3))), + binop(Iop_OrV128, + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t4)), + mkexpr(t2)), + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t2)), + mkexpr(t4))))); + break; + } + + case 0x01: { /* ADDS_A.H */ + DIP("ADDS_A.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, unop(Iop_Abs16x8, getWReg(ws))); + assign(t2, unop(Iop_Abs16x8, getWReg(wt))); + assign(t3, binop(Iop_SarN16x8, + binop(Iop_AndV128, + mkexpr(t1), + getWReg(ws)), + mkU8(15))); + assign(t4, binop(Iop_SarN16x8, + binop(Iop_AndV128, + mkexpr(t2), + getWReg(wt)), + mkU8(15))); + putWReg(wd, binop(Iop_QAdd16Sx8, + binop(Iop_OrV128, + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t3)), + mkexpr(t1)), + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t1)), + mkexpr(t3))), + binop(Iop_OrV128, + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t4)), + mkexpr(t2)), + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t2)), + mkexpr(t4))))); + break; + } + + case 0x02: { /* ADDS_A.W */ + DIP("ADDS_A.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, unop(Iop_Abs32x4, getWReg(ws))); + assign(t2, unop(Iop_Abs32x4, getWReg(wt))); + assign(t3, binop(Iop_SarN32x4, + binop(Iop_AndV128, + mkexpr(t1), + getWReg(ws)), + mkU8(31))); + assign(t4, binop(Iop_SarN32x4, + binop(Iop_AndV128, + mkexpr(t2), + getWReg(wt)), + mkU8(31))); + putWReg(wd, binop(Iop_QAdd32Sx4, + binop(Iop_OrV128, + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t3)), + mkexpr(t1)), + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t1)), + mkexpr(t3))), + binop(Iop_OrV128, + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t4)), + mkexpr(t2)), + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t2)), + mkexpr(t4))))); + break; + } + + case 0x03: { /* ADDS_A.D */ + DIP("ADDS_A.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, unop(Iop_Abs64x2, getWReg(ws))); + assign(t2, unop(Iop_Abs64x2, getWReg(wt))); + assign(t3, binop(Iop_SarN64x2, + binop(Iop_AndV128, + mkexpr(t1), + getWReg(ws)), + mkU8(63))); + assign(t4, binop(Iop_SarN64x2, + binop(Iop_AndV128, + mkexpr(t2), + getWReg(wt)), + mkU8(63))); + putWReg(wd, + binop(Iop_QAdd64Sx2, + binop(Iop_OrV128, + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t3)), + mkexpr(t1)), + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t1)), + mkexpr(t3))), + binop(Iop_OrV128, + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t4)), + mkexpr(t2)), + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t2)), + mkexpr(t4))))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x02: { /* ADDS_S.df */ + switch (df) { + case 0x00: { /* ADDS_S.B */ + DIP("ADDS_S.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_QAdd8Sx16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* ADDS_S.H */ + DIP("ADDS_S.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_QAdd16Sx8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* ADDS_S.W */ + DIP("ADDS_S.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_QAdd32Sx4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* ADDS_S.D */ + DIP("ADDS_S.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_QAdd64Sx2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x03: { /* ADDS_U.df */ + switch (df) { + case 0x00: { /* ADDS_U.B */ + DIP("ADDS_U.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_QAdd8Ux16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* ADDS_U.H */ + DIP("ADDS_U.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_QAdd16Ux8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* ADDS_U.W */ + DIP("ADDS_U.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_QAdd32Ux4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* ADDS_U.D */ + DIP("ADDS_U.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_QAdd64Ux2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x04: { /* AVE_S.df */ + switch (df) { + case 0x00: { /* AVE_S.B */ + DIP("AVE_S.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Add8x16, + binop(Iop_Add8x16, + binop(Iop_SarN8x16, + mkexpr(t1), mkU8(1)), + binop(Iop_SarN8x16, + mkexpr(t2), mkU8(1))), + binop(Iop_ShrN8x16, + binop(Iop_ShlN8x16, + binop(Iop_AndV128, + mkexpr(t1), + mkexpr(t2)), + mkU8(7)), + mkU8(7)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* AVE_S.H */ + DIP("AVE_S.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_Add16x8, + binop(Iop_Add16x8, + binop(Iop_SarN16x8, + mkexpr(t1), mkU8(1)), + binop(Iop_SarN16x8, + mkexpr(t2), mkU8(1))), + binop(Iop_ShrN16x8, + binop(Iop_ShlN16x8, + binop(Iop_AndV128, + mkexpr(t1), + mkexpr(t2)), + mkU8(15)), + mkU8(15)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* AVE_S.W */ + DIP("AVE_S.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Add32x4, + binop(Iop_Add32x4, + binop(Iop_SarN32x4, + mkexpr(t1), mkU8(1)), + binop(Iop_SarN32x4, + mkexpr(t2), mkU8(1))), + binop(Iop_ShrN32x4, + binop(Iop_ShlN32x4, + binop(Iop_AndV128, + mkexpr(t1), + mkexpr(t2)), + mkU8(31)), + mkU8(31)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* AVE_S.D */ + DIP("AVE_S.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Add64x2, + binop(Iop_Add64x2, + binop(Iop_SarN64x2, + mkexpr(t1), mkU8(1)), + binop(Iop_SarN64x2, + mkexpr(t2), mkU8(1))), + binop(Iop_ShrN64x2, + binop(Iop_ShlN64x2, + binop(Iop_AndV128, + mkexpr(t1), + mkexpr(t2)), + mkU8(63)), + mkU8(63)))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x05: { /* AVE_U.df */ + switch (df) { + case 0x00: { /* AVE_U.B */ + DIP("AVE_U.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Add16x8, + binop(Iop_Add8x16, + binop(Iop_ShrN8x16, + mkexpr(t1), mkU8(1)), + binop(Iop_ShrN8x16, + mkexpr(t2), mkU8(1))), + binop(Iop_ShrN8x16, + binop(Iop_ShlN8x16, + binop(Iop_AndV128, + mkexpr(t1), + mkexpr(t2)), + mkU8(7)), + mkU8(7)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* AVE_U.H */ + DIP("AVE_U.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Add16x8, + binop(Iop_Add16x8, + binop(Iop_ShrN16x8, + mkexpr(t1), mkU8(1)), + binop(Iop_ShrN16x8, + mkexpr(t2), mkU8(1))), + binop(Iop_ShrN16x8, + binop(Iop_ShlN16x8, + binop(Iop_AndV128, + mkexpr(t1), + mkexpr(t2)), + mkU8(15)), + mkU8(15)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* AVE_U.W */ + DIP("AVE_U.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Add32x4, + binop(Iop_Add32x4, + binop(Iop_ShrN32x4, + mkexpr(t1), mkU8(1)), + binop(Iop_ShrN32x4, + mkexpr(t2), mkU8(1))), + binop(Iop_ShrN32x4, + binop(Iop_ShlN32x4, + binop(Iop_AndV128, + mkexpr(t1), + mkexpr(t2)), + mkU8(31)), + mkU8(31)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* AVE_U.D */ + DIP("AVE_U.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Add64x2, + binop(Iop_Add64x2, + binop(Iop_ShrN64x2, + mkexpr(t1), mkU8(1)), + binop(Iop_ShrN64x2, + mkexpr(t2), mkU8(1))), + binop(Iop_ShrN64x2, + binop(Iop_ShlN64x2, + binop(Iop_AndV128, + mkexpr(t1), + mkexpr(t2)), + mkU8(63)), + mkU8(63)))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x06: { /* AVER_S.df */ + switch (df) { + case 0x00: { /* AVER_S.B */ + DIP("AVER_S.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Avg8Sx16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* AVER_S.H */ + DIP("AVER_S.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Avg16Sx8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* AVER_S.W */ + DIP("AVER_S.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Avg32Sx4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* AVER_S.D */ + DIP("AVER_S.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Add64x2, + binop(Iop_Add64x2, + binop(Iop_SarN64x2, + mkexpr(t1), mkU8(1)), + binop(Iop_SarN64x2, + mkexpr(t2), mkU8(1))), + binop(Iop_ShrN64x2, + binop(Iop_ShlN64x2, + binop(Iop_OrV128, + mkexpr(t1), + mkexpr(t2)), + mkU8(63)), + mkU8(63)))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x07: { /* AVER_U.df */ + switch (df) { + case 0x00: { /* AVER_U.B */ + DIP("AVER_U.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Avg8Ux16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* AVER_U.H */ + DIP("AVER_U.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Avg16Ux8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* AVER_U.W */ + DIP("AVER_U.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Avg32Ux4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* AVER_U.D */ + DIP("AVER_U.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_Add64x2, + binop(Iop_Add64x2, + binop(Iop_ShrN64x2, + mkexpr(t1), mkU8(1)), + binop(Iop_ShrN64x2, + mkexpr(t2), mkU8(1))), + binop(Iop_ShrN64x2, + binop(Iop_ShlN64x2, + binop(Iop_OrV128, + mkexpr(t1), + mkexpr(t2)), + mkU8(63)), + mkU8(63)))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_3R_11(UInt cins, UChar wd, UChar ws) { /* 3R (0x11) */ + IRTemp t1, t2, t3; + UShort operation; + UChar df, wt; + + operation = (cins & 0x03800000) >> 23; + df = (cins & 0x00600000) >> 21; + wt = (cins & 0x001F0000) >> 16; + + switch (operation) { + case 0x00: { /* SUBS_S.df */ + switch (df) { + case 0x00: { /* SUBS_S.B */ + DIP("SUBS_S.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_QSub8Sx16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* SUBS_S.H */ + DIP("SUBS_S.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_QSub16Sx8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* SUBS_S.W */ + DIP("SUBS_S.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_QSub32Sx4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* SUBS_S.D */ + DIP("SUBS_S.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_QSub64Sx2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x01: { /* SUBS_U.df */ + switch (df) { + case 0x00: { /* SUBS_U.B */ + DIP("SUBS_U.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_QSub8Ux16, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* SUBS_U.H */ + DIP("SUBS_U.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_QSub16Ux8, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* SUBS_U.W */ + DIP("SUBS_U.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_QSub32Ux4, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* SUBS_U.D */ + DIP("SUBS_U.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_QSub64Ux2, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x02: { /* SUBSUS_U.df */ + switch (df) { + case 0x00: { /* SUBSUS_U.B */ + DIP("SUBSUS_U.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, binop(Iop_Sub8x16, getWReg(ws), getWReg(wt))); + assign(t2, binop(Iop_SarN8x16, getWReg(wt), mkU8(7))); + assign(t3, binop(Iop_OrV128, + binop(Iop_CmpGT8Ux16, + getWReg(ws), + getWReg(wt)), + binop(Iop_CmpEQ8x16, + getWReg(ws), + getWReg(wt)))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t3), mkexpr(t2)), + binop(Iop_AndV128, + mkexpr(t1), + binop(Iop_XorV128, + mkexpr(t3), + mkexpr(t2))))); + break; + } + + case 0x01: { /* SUBSUS_U.H */ + DIP("SUBSUS_U.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, binop(Iop_Sub16x8, getWReg(ws), getWReg(wt))); + assign(t2, binop(Iop_SarN16x8, getWReg(wt), mkU8(15))); + assign(t3, + binop(Iop_OrV128, + binop(Iop_CmpGT16Ux8, + getWReg(ws), + getWReg(wt)), + binop(Iop_CmpEQ16x8, + getWReg(ws), + getWReg(wt)))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t3), mkexpr(t2)), + binop(Iop_AndV128, + mkexpr(t1), + binop(Iop_XorV128, + mkexpr(t3), + mkexpr(t2))))); + break; + } + + case 0x02: { /* SUBSUS_U.W */ + DIP("SUBSUS_U.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, binop(Iop_Sub32x4, getWReg(ws), getWReg(wt))); + assign(t2, binop(Iop_SarN32x4, getWReg(wt), mkU8(31))); + assign(t3, + binop(Iop_OrV128, + binop(Iop_CmpGT32Ux4, + getWReg(ws), + getWReg(wt)), + binop(Iop_CmpEQ32x4, + getWReg(ws), + getWReg(wt)))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t3), mkexpr(t2)), + binop(Iop_AndV128, + mkexpr(t1), + binop(Iop_XorV128, + mkexpr(t3), + mkexpr(t2))))); + break; + } + + case 0x03: { /* SUBSUS_U.D */ + DIP("SUBSUS_U.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, binop(Iop_Sub64x2, getWReg(ws), getWReg(wt))); + assign(t2, binop(Iop_SarN64x2, getWReg(wt), mkU8(63))); + assign(t3, + binop(Iop_OrV128, + binop(Iop_CmpGT64Ux2, + getWReg(ws), + getWReg(wt)), + binop(Iop_CmpEQ64x2, + getWReg(ws), + getWReg(wt)))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t3), mkexpr(t2)), + binop(Iop_AndV128, + mkexpr(t1), + binop(Iop_XorV128, + mkexpr(t3), + mkexpr(t2))))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x03: { /* SUBSUU_S.df */ + switch (df) { + case 0x00: { /* SUBSUU_S.B */ + DIP("SUBSUU_S.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, binop(Iop_Sub8x16, getWReg(ws), getWReg(wt))); + assign(t2, + binop(Iop_SarN8x16, + binop (Iop_AndV128, + binop(Iop_XorV128, + getWReg(ws), + getWReg(wt)), + binop(Iop_XorV128, + mkexpr(t1), + getWReg(wt))), + mkU8(7))); + assign(t3, + binop(Iop_AndV128, + binop(Iop_SarN8x16, + getWReg(ws), mkU8(7)), + mkexpr(t2))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t1), + unop(Iop_NotV128, + mkexpr(t2))), + binop(Iop_XorV128, + binop(Iop_ShlN8x16, + mkexpr(t2), mkU8(7)), + mkexpr(t3)))); + break; + } + + case 0x01: { /* SUBSUU_S.H */ + DIP("SUBSUU_S.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, binop(Iop_Sub16x8, getWReg(ws), getWReg(wt))); + assign(t2, + binop(Iop_SarN16x8, + binop (Iop_AndV128, + binop(Iop_XorV128, + getWReg(ws), + getWReg(wt)), + binop(Iop_XorV128, + mkexpr(t1), + getWReg(wt))), + mkU8(15))); + assign(t3, + binop(Iop_AndV128, + binop(Iop_SarN16x8, + getWReg(ws), + mkU8(15)), + mkexpr(t2))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t1), + unop(Iop_NotV128, + mkexpr(t2))), + binop(Iop_XorV128, + binop(Iop_ShlN16x8, + mkexpr(t2), mkU8(15)), + mkexpr(t3)))); + break; + } + + case 0x02: { /* SUBSUU_S.W */ + DIP("SUBSUU_S.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, binop(Iop_Sub32x4, getWReg(ws), getWReg(wt))); + assign(t2, + binop(Iop_SarN32x4, + binop (Iop_AndV128, + binop(Iop_XorV128, + getWReg(ws), + getWReg(wt)), + binop(Iop_XorV128, + mkexpr(t1), + getWReg(wt))), + mkU8(31))); + assign(t3, + binop(Iop_AndV128, + binop(Iop_SarN32x4, + getWReg(ws), + mkU8(31)), + mkexpr(t2))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t1), + unop(Iop_NotV128, + mkexpr(t2))), + binop(Iop_XorV128, + binop(Iop_ShlN32x4, + mkexpr(t2), + mkU8(31)), + mkexpr(t3)))); + break; + } + + case 0x03: { /* SUBSUU_S.D */ + DIP("SUBSUU_S.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, binop(Iop_Sub64x2, getWReg(ws), getWReg(wt))); + assign(t2, + binop(Iop_SarN64x2, + binop (Iop_AndV128, + binop(Iop_XorV128, + getWReg(ws), + getWReg(wt)), + binop(Iop_XorV128, + mkexpr(t1), + getWReg(wt))), + mkU8(63))); + assign(t3, + binop(Iop_AndV128, + binop(Iop_SarN64x2, + getWReg(ws), + mkU8(63)), + mkexpr(t2))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t1), + unop(Iop_NotV128, + mkexpr(t2))), + binop(Iop_XorV128, + binop(Iop_ShlN64x2, + mkexpr(t2), mkU8(63)), + mkexpr(t3)))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x04: { /* ASUB_S.df */ + switch (df) { + case 0x00: { /* ASUB_S.B */ + DIP("ASUB_S.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, binop(Iop_SarN8x16, getWReg(ws), mkU8(7))); + assign(t2, binop(Iop_SarN8x16, getWReg(wt), mkU8(7))); + assign(t3, binop(Iop_Sub8x16, getWReg(ws), getWReg(wt))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_OrV128, + binop(Iop_AndV128, + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t1)), + mkexpr(t2)), + mkexpr(t3)), + binop(Iop_AndV128, + unop(Iop_NotV128, + binop(Iop_XorV128, + mkexpr(t1), + mkexpr(t2))), + unop(Iop_Abs8x16, + mkexpr(t3)))), + binop(Iop_AndV128, + binop(Iop_AndV128, + mkexpr(t1), + unop(Iop_NotV128, + mkexpr(t2))), + binop(Iop_Sub8x16, + getWReg(wt), + getWReg(ws))))); + break; + } + + case 0x01: { /* ASUB_S.H */ + DIP("ASUB_S.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, binop(Iop_SarN16x8, getWReg(ws), mkU8(15))); + assign(t2, binop(Iop_SarN16x8, getWReg(wt), mkU8(15))); + assign(t3, binop(Iop_Sub16x8, getWReg(ws), getWReg(wt))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_OrV128, + binop(Iop_AndV128, + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t1)), + mkexpr(t2)), + mkexpr(t3)), + binop(Iop_AndV128, + unop(Iop_NotV128, + binop(Iop_XorV128, + mkexpr(t1), + mkexpr(t2))), + unop(Iop_Abs16x8, + mkexpr(t3)))), + binop(Iop_AndV128, + binop(Iop_AndV128, + mkexpr(t1), + unop(Iop_NotV128, + mkexpr(t2))), + binop(Iop_Sub16x8, + getWReg(wt), + getWReg(ws))))); + break; + } + + case 0x02: { /* ASUB_S.W */ + DIP("ASUB_S.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, binop(Iop_SarN32x4, getWReg(ws), mkU8(31))); + assign(t2, binop(Iop_SarN32x4, getWReg(wt), mkU8(31))); + assign(t3, binop(Iop_Sub32x4, getWReg(ws), getWReg(wt))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_OrV128, + binop(Iop_AndV128, + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t1)), + mkexpr(t2)), + mkexpr(t3)), + binop(Iop_AndV128, + unop(Iop_NotV128, + binop(Iop_XorV128, + mkexpr(t1), + mkexpr(t2))), + unop(Iop_Abs32x4, + mkexpr(t3)))), + binop(Iop_AndV128, + binop(Iop_AndV128, + mkexpr(t1), + unop(Iop_NotV128, + mkexpr(t2))), + binop(Iop_Sub32x4, + getWReg(wt), + getWReg(ws))))); + break; + } + + case 0x03: { /* ASUB_S.D */ + DIP("ASUB_S.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, binop(Iop_SarN64x2, getWReg(ws), mkU8(63))); + assign(t2, binop(Iop_SarN64x2, getWReg(wt), mkU8(63))); + assign(t3, binop(Iop_Sub64x2, getWReg(ws), getWReg(wt))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_OrV128, + binop(Iop_AndV128, + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t1)), + mkexpr(t2)), + mkexpr(t3)), + binop(Iop_AndV128, + unop(Iop_NotV128, + binop(Iop_XorV128, + mkexpr(t1), + mkexpr(t2))), + unop(Iop_Abs64x2, + mkexpr(t3)))), + binop(Iop_AndV128, + binop(Iop_AndV128, + mkexpr(t1), + unop(Iop_NotV128, + mkexpr(t2))), + binop(Iop_Sub64x2, + getWReg(wt), + getWReg(ws))))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x05: { /* ASUB_U.df */ + switch (df) { + case 0x00: { /* ASUB_U.B */ + DIP("ASUB_U.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_SarN8x16, + binop(Iop_XorV128, + mkexpr(t1), mkexpr(t2)), + mkU8(7))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t3)), + unop(Iop_Abs8x16, + binop(Iop_Sub8x16, + mkexpr(t1), + mkexpr(t2)))), + binop(Iop_AndV128, mkexpr(t3), + binop(Iop_Sub8x16, + binop(Iop_Max8Ux16, + mkexpr(t1), + mkexpr(t2)), + binop(Iop_Min8Ux16, + mkexpr(t1), + mkexpr(t2)))))); + break; + } + + case 0x01: { /* ASUB_U.H */ + DIP("ASUB_U.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_SarN16x8, + binop(Iop_XorV128, + mkexpr(t1), mkexpr(t2)), + mkU8(15))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t3)), + unop(Iop_Abs16x8, + binop(Iop_Sub16x8, + mkexpr(t1), + mkexpr(t2)))), + binop(Iop_AndV128, + mkexpr(t3), + binop(Iop_Sub16x8, + binop(Iop_Max16Ux8, + mkexpr(t1), + mkexpr(t2)), + binop(Iop_Min16Ux8, + mkexpr(t1), + mkexpr(t2)))))); + break; + } + + case 0x02: { /* ASUB_U.W */ + DIP("ASUB_U.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_SarN32x4, + binop(Iop_XorV128, + mkexpr(t1), mkexpr(t2)), + mkU8(31))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t3)), + unop(Iop_Abs32x4, + binop(Iop_Sub32x4, + mkexpr(t1), + mkexpr(t2)))), + binop(Iop_AndV128, + mkexpr(t3), + binop(Iop_Sub32x4, + binop(Iop_Max32Ux4, + mkexpr(t1), + mkexpr(t2)), + binop(Iop_Min32Ux4, + mkexpr(t1), + mkexpr(t2)))))); + break; + } + + case 0x03: { /* ASUB_U.D */ + DIP("ASUB_U.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_SarN64x2, + binop(Iop_XorV128, + mkexpr(t1), mkexpr(t2)), + mkU8(63))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t3)), + unop(Iop_Abs64x2, + binop(Iop_Sub64x2, + mkexpr(t1), + mkexpr(t2)))), + binop(Iop_AndV128, + mkexpr(t3), + binop(Iop_Sub64x2, + binop(Iop_Max64Ux2, + mkexpr(t1), + mkexpr(t2)), + binop(Iop_Min64Ux2, + mkexpr(t1), + mkexpr(t2)))))); + break; + } + + default: + return -1; + } + + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_3R_12(UInt cins, UChar wd, UChar ws) { /* 3R (0x12) */ + IRTemp t1, t2, t3, t4, t5, t6; + UShort operation; + UChar df, wt; + + operation = (cins & 0x03800000) >> 23; + df = (cins & 0x00600000) >> 21; + wt = (cins & 0x001F0000) >> 16; + + switch (operation) { + case 0x00: { /* MULV.df */ + switch (df) { + case 0x00: { /* MULV.B */ + DIP("MULV.B w%d, w%d, w%d", wd, ws, wt); + putWReg(wd, binop(Iop_Mul8x16, getWReg(ws), getWReg(wt))); + break; + } + + case 0x01: { /* MULV.H */ + DIP("MULV.H w%d, w%d, w%d", wd, ws, wt); + putWReg(wd, binop(Iop_Mul16x8, getWReg(ws), getWReg(wt))); + break; + } + + case 0x02: { /* MULV.W */ + DIP("MULV.W w%d, w%d, w%d", wd, ws, wt); + putWReg(wd, binop(Iop_Mul32x4, getWReg(ws), getWReg(wt))); + break; + } + + case 0x03: { /* MULV.D */ + DIP("MULV.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_Mul64, + unop(Iop_V128HIto64, + mkexpr(t1)), + unop(Iop_V128HIto64, + mkexpr(t2))), + binop(Iop_Mul64, + unop(Iop_V128to64, + mkexpr(t1)), + unop(Iop_V128to64, + mkexpr(t2))))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x01: { /* MADDV.df */ + switch (df) { + case 0x00: { /* MADDV.B */ + DIP("MADDV.B w%d, w%d, w%d", wd, ws, wt); + putWReg(wd, + binop(Iop_Add8x16, + getWReg(wd), + binop(Iop_Mul8x16, + getWReg(ws), + getWReg(wt)))); + break; + } + + case 0x01: { /* MADDV.H */ + DIP("MADDV.H w%d, w%d, w%d", wd, ws, wt); + putWReg(wd, + binop(Iop_Add16x8, + getWReg(wd), + binop(Iop_Mul16x8, + getWReg(ws), + getWReg(wt)))); + break; + } + + case 0x02: { /* MADDV.W */ + DIP("MADDV.W w%d, w%d, w%d", wd, ws, wt); + putWReg(wd, + binop(Iop_Add32x4, + getWReg(wd), + binop(Iop_Mul32x4, + getWReg(ws), + getWReg(wt)))); + break; + } + + case 0x03: { /* MADDV.D */ + DIP("MADDV.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + putWReg(wd, + binop(Iop_Add64x2, + getWReg(wd), + binop(Iop_64HLtoV128, + binop(Iop_Mul64, + unop(Iop_V128HIto64, + mkexpr(t1)), + unop(Iop_V128HIto64, + mkexpr(t2))), + binop(Iop_Mul64, + unop(Iop_V128to64, + mkexpr(t1)), + unop(Iop_V128to64, + mkexpr(t2)))))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x02: { /* MSUBV.df */ + switch (df) { + case 0x00: { /* MSUBV.B */ + DIP("MSUBV.B w%d, w%d, w%d", wd, ws, wt); + putWReg(wd, + binop(Iop_Sub8x16, + getWReg(wd), + binop(Iop_Mul8x16, + getWReg(ws), + getWReg(wt)))); + break; + } + + case 0x01: { /* MSUBV.H */ + DIP("MSUBV.H w%d, w%d, w%d", wd, ws, wt); + putWReg(wd, + binop(Iop_Sub16x8, + getWReg(wd), + binop(Iop_Mul16x8, + getWReg(ws), + getWReg(wt)))); + break; + } + + case 0x02: { /* MSUBV.W */ + DIP("MSUBV.W w%d, w%d, w%d", wd, ws, wt); + putWReg(wd, + binop(Iop_Sub32x4, + getWReg(wd), + binop(Iop_Mul32x4, + getWReg(ws), + getWReg(wt)))); + break; + } + + case 0x03: { /* MSUBV.D */ + DIP("MSUBV.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + putWReg(wd, + binop(Iop_Sub64x2, + getWReg(wd), + binop(Iop_64HLtoV128, + binop(Iop_Mul64, + unop(Iop_V128HIto64, + mkexpr(t1)), + unop(Iop_V128HIto64, + mkexpr(t2))), + binop(Iop_Mul64, + unop(Iop_V128to64, + mkexpr(t1)), + unop(Iop_V128to64, + mkexpr(t2)))))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x04: { /* DIV_S.df */ + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + + switch (df) { + case 0x00: { /* DIV_S.B */ + DIP("DIV_S.B w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[16]; + Int i; + + for (i = 0; i < 16; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + binop(Iop_Shl32, + binop(Iop_And32, + mkU32(0xFF), + binop(Iop_DivS32, + unop(Iop_8Sto32, + binop(Iop_GetElem8x16, + mkexpr(t1), + mkU8(i))), + unop(Iop_8Sto32, + binop(Iop_GetElem8x16, + mkexpr(t2), + mkU8(i))))), + mkU8((i & 3) << 3))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_Or32, + mkexpr(tmp[15]), + binop(Iop_Or32, + mkexpr(tmp[14]), + binop(Iop_Or32, + mkexpr(tmp[13]), + mkexpr(tmp[12])))), + binop(Iop_Or32, + mkexpr(tmp[11]), + binop(Iop_Or32, + mkexpr(tmp[10]), + binop(Iop_Or32, + mkexpr(tmp[9]), + mkexpr(tmp[8]))))), + binop(Iop_32HLto64, + binop(Iop_Or32, + mkexpr(tmp[7]), + binop(Iop_Or32, + mkexpr(tmp[6]), + binop(Iop_Or32, + mkexpr(tmp[5]), + mkexpr(tmp[4])))), + binop(Iop_Or32, + mkexpr(tmp[3]), + binop(Iop_Or32, + mkexpr(tmp[2]), + binop(Iop_Or32, + mkexpr(tmp[1]), + mkexpr(tmp[0])))))) + ); + break; + } + + case 0x01: { /* DIV_S.H */ + DIP("DIV_S.H w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[8]; + Int i; + + for (i = 0; i < 8; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + binop(Iop_Shl32, + binop(Iop_And32, + mkU32(0xFFFF), + binop(Iop_DivS32, + unop(Iop_16Sto32, + binop(Iop_GetElem16x8, + mkexpr(t1), + mkU8(i))), + unop(Iop_16Sto32, + binop(Iop_GetElem16x8, + mkexpr(t2), + mkU8(i))))), + mkU8((i & 1) << 4))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_Or32, + mkexpr(tmp[7]), + mkexpr(tmp[6])), + binop(Iop_Or32, + mkexpr(tmp[5]), + mkexpr(tmp[4]))), + binop(Iop_32HLto64, + binop(Iop_Or32, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_Or32, + mkexpr(tmp[1]), + mkexpr(tmp[0]))))); + break; + } + + case 0x02: { /* DIV_S.W */ + DIP("DIV_S.W w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[4]; + Int i; + + for (i = 0; i < 4; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + binop(Iop_DivS32, + binop(Iop_GetElem32x4, + mkexpr(t1), mkU8(i)), + binop(Iop_GetElem32x4, + mkexpr(t2), mkU8(i)))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, \ + binop(Iop_32HLto64, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_32HLto64, + mkexpr(tmp[1]), + mkexpr(tmp[0])))); + break; + } + + case 0x03: { /* DIV_S.D */ + DIP("DIV_S.D w%d, w%d, w%d", wd, ws, wt); + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_DivS64, + unop(Iop_V128HIto64, + mkexpr(t1)), + unop(Iop_V128HIto64, + mkexpr(t2))), + binop(Iop_DivS64, + unop(Iop_V128to64, + mkexpr(t1)), + unop(Iop_V128to64, + mkexpr(t2))))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x05: { /* DIV_U.df */ + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + + switch (df) { + case 0x00: { /* DIV_U.B */ + DIP("DIV_U.B w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[16]; + Int i; + + for (i = 0; i < 16; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + binop(Iop_Shl32, + binop(Iop_And32, + mkU32(0xFF), + binop(Iop_DivU32, + unop(Iop_8Uto32, + binop(Iop_GetElem8x16, + mkexpr(t1), + mkU8(i))), + unop(Iop_8Uto32, + binop(Iop_GetElem8x16, + mkexpr(t2), + mkU8(i))))), + mkU8((i & 3) << 3))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_Or32, + mkexpr(tmp[15]), + binop(Iop_Or32, + mkexpr(tmp[14]), + binop(Iop_Or32, + mkexpr(tmp[13]), + mkexpr(tmp[12])))), + binop(Iop_Or32, + mkexpr(tmp[11]), + binop(Iop_Or32, + mkexpr(tmp[10]), + binop(Iop_Or32, + mkexpr(tmp[9]), + mkexpr(tmp[8]))))), + binop(Iop_32HLto64, + binop(Iop_Or32, + mkexpr(tmp[7]), + binop(Iop_Or32, + mkexpr(tmp[6]), + binop(Iop_Or32, + mkexpr(tmp[5]), + mkexpr(tmp[4])))), + binop(Iop_Or32, + mkexpr(tmp[3]), + binop(Iop_Or32, + mkexpr(tmp[2]), + binop(Iop_Or32, + mkexpr(tmp[1]), + mkexpr(tmp[0])))))) + ); + break; + } + + case 0x01: { /* DIV_U.H */ + DIP("DIV_U.H w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[8]; + Int i; + + for (i = 0; i < 8; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + binop(Iop_Shl32, + binop(Iop_And32, + mkU32(0xFFFF), + binop(Iop_DivU32, + unop(Iop_16Uto32, + binop(Iop_GetElem16x8, + mkexpr(t1), + mkU8(i))), + unop(Iop_16Uto32, + binop(Iop_GetElem16x8, + mkexpr(t2), + mkU8(i))))), + mkU8((i & 1) << 4))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_Or32, + mkexpr(tmp[7]), + mkexpr(tmp[6])), + binop(Iop_Or32, + mkexpr(tmp[5]), + mkexpr(tmp[4]))), + binop(Iop_32HLto64, + binop(Iop_Or32, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_Or32, + mkexpr(tmp[1]), + mkexpr(tmp[0]))))); + break; + } + + case 0x02: { /* DIV_U.W */ + DIP("DIV_U.W w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[4]; + Int i; + + for (i = 0; i < 4; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + binop(Iop_DivU32, + binop(Iop_GetElem32x4, + mkexpr(t1), mkU8(i)), + binop(Iop_GetElem32x4, + mkexpr(t2), mkU8(i)))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_32HLto64, + mkexpr(tmp[1]), + mkexpr(tmp[0])))); + break; + } + + case 0x03: { /* DIV_U.D */ + DIP("DIV_U.D w%d, w%d, w%d", wd, ws, wt); + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_DivU64, + unop(Iop_V128HIto64, + mkexpr(t1)), + unop(Iop_V128HIto64, + mkexpr(t2))), + binop(Iop_DivU64, + unop(Iop_V128to64, + mkexpr(t1)), + unop(Iop_V128to64, + mkexpr(t2))))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x06: { /* MOD_S.df */ + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + + switch (df) { + case 0x00: { /* MOD_S.B */ + DIP("MOD_S.B w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[16]; + Int i; + + for (i = 0; i < 16; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + binop(Iop_Shl32, + binop(Iop_And32, + mkU32(0xFF), + unop(Iop_64HIto32, + binop(Iop_DivModS32to32, + unop(Iop_8Sto32, + binop(Iop_GetElem8x16, + mkexpr(t1), + mkU8(i))), + unop(Iop_8Sto32, + binop(Iop_GetElem8x16, + mkexpr(t2), + mkU8(i)))))), + mkU8((i & 3) << 3))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_Or32, + mkexpr(tmp[15]), + binop(Iop_Or32, + mkexpr(tmp[14]), + binop(Iop_Or32, + mkexpr(tmp[13]), + mkexpr(tmp[12])))), + binop(Iop_Or32, + mkexpr(tmp[11]), + binop(Iop_Or32, + mkexpr(tmp[10]), + binop(Iop_Or32, + mkexpr(tmp[9]), + mkexpr(tmp[8]))))), + binop(Iop_32HLto64, + binop(Iop_Or32, + mkexpr(tmp[7]), + binop(Iop_Or32, + mkexpr(tmp[6]), + binop(Iop_Or32, + mkexpr(tmp[5]), + mkexpr(tmp[4])))), + binop(Iop_Or32, + mkexpr(tmp[3]), + binop(Iop_Or32, + mkexpr(tmp[2]), + binop(Iop_Or32, + mkexpr(tmp[1]), + mkexpr(tmp[0]))))))); + break; + } + + case 0x01: { /* MOD_S.H */ + DIP("MOD_S.H w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[8]; + Int i; + + for (i = 0; i < 8; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + binop(Iop_Shl32, + binop(Iop_And32, + mkU32(0xFFFF), + unop(Iop_64HIto32, + binop(Iop_DivModS32to32, + unop(Iop_16Sto32, + binop(Iop_GetElem16x8, + mkexpr(t1), + mkU8(i))), + unop(Iop_16Sto32, + binop(Iop_GetElem16x8, + mkexpr(t2), + mkU8(i)))))), + mkU8((i & 1) << 4))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_Or32, + mkexpr(tmp[7]), + mkexpr(tmp[6])), + binop(Iop_Or32, + mkexpr(tmp[5]), + mkexpr(tmp[4]))), + binop(Iop_32HLto64, + binop(Iop_Or32, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_Or32, + mkexpr(tmp[1]), + mkexpr(tmp[0]))))); + break; + } + + case 0x02: { /* MOD_S.W */ + DIP("MOD_S.W w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[4]; + Int i; + + for (i = 0; i < 4; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + unop(Iop_64HIto32, + binop(Iop_DivModS32to32, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(i)), + binop(Iop_GetElem32x4, + mkexpr(t2), + mkU8(i))))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_32HLto64, + mkexpr(tmp[1]), + mkexpr(tmp[0])))); + break; + } + + case 0x03: { /* MOD_S.D */ + DIP("MOD_S.D w%d, w%d, w%d", wd, ws, wt); + t3 = newTemp(Ity_I64); + t4 = newTemp(Ity_I64); + t5 = newTemp(Ity_I64); + t6 = newTemp(Ity_I64); + assign(t3, unop(Iop_V128HIto64, mkexpr(t1))); + assign(t4, unop(Iop_V128HIto64, mkexpr(t2))); + assign(t5, unop(Iop_V128to64, mkexpr(t1))); + assign(t6, unop(Iop_V128to64, mkexpr(t2))); + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_Sub64, + mkexpr(t3), + binop(Iop_Mul64, + mkexpr(t4), + binop(Iop_DivS64, + mkexpr(t3), + mkexpr(t4)))), + binop(Iop_Sub64, + mkexpr(t5), + binop(Iop_Mul64, + mkexpr(t6), + binop(Iop_DivS64, + mkexpr(t5), + mkexpr(t6)))))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x07: { /* MOD_U.df */ + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + + switch (df) { + case 0x00: { /* MOD_U.B */ + DIP("MOD_U.B w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[16]; + Int i; + + for (i = 0; i < 16; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + binop(Iop_Shl32, + binop(Iop_And32, + mkU32(0xFF), + unop(Iop_64HIto32, + binop(Iop_DivModU32to32, + unop(Iop_8Uto32, + binop(Iop_GetElem8x16, + mkexpr(t1), + mkU8(i))), + unop(Iop_8Uto32, + binop(Iop_GetElem8x16, + mkexpr(t2), + mkU8(i)))))), + mkU8((i & 3) << 3))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_Or32, + mkexpr(tmp[15]), + binop(Iop_Or32, + mkexpr(tmp[14]), + binop(Iop_Or32, + mkexpr(tmp[13]), + mkexpr(tmp[12])))), + binop(Iop_Or32, + mkexpr(tmp[11]), + binop(Iop_Or32, + mkexpr(tmp[10]), + binop(Iop_Or32, + mkexpr(tmp[9]), + mkexpr(tmp[8]))))), + binop(Iop_32HLto64, + binop(Iop_Or32, + mkexpr(tmp[7]), + binop(Iop_Or32, + mkexpr(tmp[6]), + binop(Iop_Or32, + mkexpr(tmp[5]), + mkexpr(tmp[4])))), + binop(Iop_Or32, + mkexpr(tmp[3]), + binop(Iop_Or32, + mkexpr(tmp[2]), + binop(Iop_Or32, + mkexpr(tmp[1]), + mkexpr(tmp[0]))))))); + break; + } + + case 0x01: { /* MOD_U.H */ + DIP("MOD_U.H w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[8]; + Int i; + + for (i = 0; i < 8; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + binop(Iop_Shl32, + binop(Iop_And32, + mkU32(0xFFFF), + unop(Iop_64HIto32, + binop(Iop_DivModU32to32, + unop(Iop_16Uto32, + binop(Iop_GetElem16x8, + mkexpr(t1), + mkU8(i))), + unop(Iop_16Uto32, + binop(Iop_GetElem16x8, + mkexpr(t2), + mkU8(i)))))), + mkU8((i & 1) << 4))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_Or32, + mkexpr(tmp[7]), + mkexpr(tmp[6])), + binop(Iop_Or32, + mkexpr(tmp[5]), + mkexpr(tmp[4]))), + binop(Iop_32HLto64, + binop(Iop_Or32, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_Or32, + mkexpr(tmp[1]), + mkexpr(tmp[0]))))); + break; + } + + case 0x02: { /* MOD_U.W */ + DIP("MOD_U.W w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[4]; + Int i; + + for (i = 0; i < 4; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + unop(Iop_64HIto32, + binop(Iop_DivModU32to32, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(i)), + binop(Iop_GetElem32x4, + mkexpr(t2), + mkU8(i))))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_32HLto64, + mkexpr(tmp[1]), + mkexpr(tmp[0])))); + break; + } + + case 0x03: { /* MOD_U.D */ + DIP("MOD_U.D w%d, w%d, w%d", wd, ws, wt); + t3 = newTemp(Ity_I64); + t4 = newTemp(Ity_I64); + t5 = newTemp(Ity_I64); + t6 = newTemp(Ity_I64); + assign(t3, unop(Iop_V128HIto64, mkexpr(t1))); + assign(t4, unop(Iop_V128HIto64, mkexpr(t2))); + assign(t5, unop(Iop_V128to64, mkexpr(t1))); + assign(t6, unop(Iop_V128to64, mkexpr(t2))); + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_Sub64, + mkexpr(t3), + binop(Iop_Mul64, + mkexpr(t4), + binop(Iop_DivU64, + mkexpr(t3), + mkexpr(t4)))), + binop(Iop_Sub64, + mkexpr(t5), + binop(Iop_Mul64, + mkexpr(t6), + binop(Iop_DivU64, + mkexpr(t5), + mkexpr(t6)))))); + break; + } + + default: + return -1; + } + + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_3R_13(UInt cins, UChar wd, UChar ws) { /* 3R (0x13) */ + IRTemp t1, t2; + UShort operation; + UChar df, wt; + + operation = (cins & 0x03800000) >> 23; + df = (cins & 0x00600000) >> 21; + wt = (cins & 0x001F0000) >> 16; + + switch (operation) { + case 0x00: { /* DOTP_S.df */ + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + + switch (df) { + case 0x01: { /* DOTP_S.H */ + DIP("DOTP_S.H w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[8]; + Int i; + + for (i = 0; i < 8; i++) { + tmp[i] = newTemp(Ity_I16); + assign(tmp[i], + binop(Iop_Add16, + binop(Iop_MullS8, + binop(Iop_GetElem8x16, + mkexpr(t1), + mkU8(2 * i)), + binop(Iop_GetElem8x16, + mkexpr(t2), + mkU8(2 * i))), + binop(Iop_MullS8, + binop(Iop_GetElem8x16, + mkexpr(t1), + mkU8(2 * i + 1)), + binop(Iop_GetElem8x16, + mkexpr(t2), + mkU8(2 * i + 1))))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_16HLto32, + mkexpr(tmp[7]), + mkexpr(tmp[6])), + binop(Iop_16HLto32, + mkexpr(tmp[5]), + mkexpr(tmp[4]))), + binop(Iop_32HLto64, + binop(Iop_16HLto32, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_16HLto32, + mkexpr(tmp[1]), + mkexpr(tmp[0]))))); + break; + } + + case 0x02: { /* DOTP_S.W */ + DIP("DOTP_S.W w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[4]; + Int i; + + for (i = 0; i < 4; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + binop(Iop_Add32, + binop(Iop_MullS16, + binop(Iop_GetElem16x8, + mkexpr(t1), + mkU8(2 * i)), + binop(Iop_GetElem16x8, + mkexpr(t2), + mkU8(2 * i))), + binop(Iop_MullS16, + binop(Iop_GetElem16x8, + mkexpr(t1), + mkU8(2 * i + 1)), + binop(Iop_GetElem16x8, + mkexpr(t2), + mkU8(2 * i + 1))))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_32HLto64, + mkexpr(tmp[1]), + mkexpr(tmp[0])))); + break; + } + + case 0x03: { /* DOTP_S.D */ + DIP("DOTP_S.D w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[2]; + Int i; + + for (i = 0; i < 2; i++) { + tmp[i] = newTemp(Ity_I64); + assign(tmp[i], + binop(Iop_Add64, + binop(Iop_MullS32, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(2 * i)), + binop(Iop_GetElem32x4, + mkexpr(t2), + mkU8(2 * i))), + binop(Iop_MullS32, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(2 * i + 1)), + binop(Iop_GetElem32x4, + mkexpr(t2), + mkU8(2 * i + 1))))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + mkexpr(tmp[1]), mkexpr(tmp[0]))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x01: { /* DOTP_U.df */ + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + + switch (df) { + case 0x01: { /* DOTP_U.H */ + DIP("DOTP_U.H w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[8]; + Int i; + + for (i = 0; i < 8; i++) { + tmp[i] = newTemp(Ity_I16); + assign(tmp[i], + binop(Iop_Add16, + binop(Iop_MullU8, + binop(Iop_GetElem8x16, + mkexpr(t1), + mkU8(2 * i)), + binop(Iop_GetElem8x16, + mkexpr(t2), + mkU8(2 * i))), + binop(Iop_MullU8, + binop(Iop_GetElem8x16, + mkexpr(t1), + mkU8(2 * i + 1)), + binop(Iop_GetElem8x16, + mkexpr(t2), + mkU8(2 * i + 1))))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_16HLto32, + mkexpr(tmp[7]), + mkexpr(tmp[6])), + binop(Iop_16HLto32, + mkexpr(tmp[5]), + mkexpr(tmp[4]))), + binop(Iop_32HLto64, + binop(Iop_16HLto32, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_16HLto32, + mkexpr(tmp[1]), + mkexpr(tmp[0]))))); + break; + } + + case 0x02: { /* DOTP_U.W */ + DIP("DOTP_U.W w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[4]; + Int i; + + for (i = 0; i < 4; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + binop(Iop_Add32, + binop(Iop_MullU16, + binop(Iop_GetElem16x8, + mkexpr(t1), + mkU8(2 * i)), + binop(Iop_GetElem16x8, + mkexpr(t2), + mkU8(2 * i))), + binop(Iop_MullU16, + binop(Iop_GetElem16x8, + mkexpr(t1), + mkU8(2 * i + 1)), + binop(Iop_GetElem16x8, + mkexpr(t2), + mkU8(2 * i + 1))))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_32HLto64, + mkexpr(tmp[1]), + mkexpr(tmp[0])))); + break; + } + + case 0x03: { /* DOTP_U.D */ + DIP("DOTP_U.D w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[2]; + Int i; + + for (i = 0; i < 2; i++) { + tmp[i] = newTemp(Ity_I64); + assign(tmp[i], + binop(Iop_Add64, + binop(Iop_MullU32, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(2 * i)), + binop(Iop_GetElem32x4, + mkexpr(t2), + mkU8(2 * i))), + binop(Iop_MullU32, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(2 * i + 1)), + binop(Iop_GetElem32x4, + mkexpr(t2), + mkU8(2 * i + 1))))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + mkexpr(tmp[1]), mkexpr(tmp[0]))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x02: { /* DPADD_S.df */ + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + + switch (df) { + case 0x01: { /* DPADD_S.H */ + DIP("DPADD_S.H w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[8]; + Int i; + + for (i = 0; i < 8; i++) { + tmp[i] = newTemp(Ity_I16); + assign(tmp[i], + binop(Iop_Add16, + binop(Iop_MullS8, + binop(Iop_GetElem8x16, + mkexpr(t1), + mkU8(2 * i)), + binop(Iop_GetElem8x16, + mkexpr(t2), + mkU8(2 * i))), + binop(Iop_MullS8, + binop(Iop_GetElem8x16, + mkexpr(t1), + mkU8(2 * i + 1)), + binop(Iop_GetElem8x16, + mkexpr(t2), + mkU8(2 * i + 1))))); + } + + putWReg(wd, + binop(Iop_Add16x8, + getWReg(wd), + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_16HLto32, + mkexpr(tmp[7]), + mkexpr(tmp[6])), + binop(Iop_16HLto32, + mkexpr(tmp[5]), + mkexpr(tmp[4]))), + binop(Iop_32HLto64, + binop(Iop_16HLto32, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_16HLto32, + mkexpr(tmp[1]), + mkexpr(tmp[0])))))); + break; + } + + case 0x02: { /* DPADD_S.W */ + DIP("DPADD_S.W w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[4]; + Int i; + + for (i = 0; i < 4; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + binop(Iop_Add32, + binop(Iop_MullS16, + binop(Iop_GetElem16x8, + mkexpr(t1), + mkU8(2 * i)), + binop(Iop_GetElem16x8, + mkexpr(t2), + mkU8(2 * i))), + binop(Iop_MullS16, + binop(Iop_GetElem16x8, + mkexpr(t1), + mkU8(2 * i + 1)), + binop(Iop_GetElem16x8, + mkexpr(t2), + mkU8(2 * i + 1))))); + } + + putWReg(wd, + binop(Iop_Add32x4, + getWReg(wd), + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_32HLto64, + mkexpr(tmp[1]), + mkexpr(tmp[0]))))); + break; + } + + case 0x03: { /* DPADD_S.D */ + DIP("DPADD_S.D w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[2]; + Int i; + + for (i = 0; i < 2; i++) { + tmp[i] = newTemp(Ity_I64); + assign(tmp[i], + binop(Iop_Add64, + binop(Iop_MullS32, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(2 * i)), + binop(Iop_GetElem32x4, + mkexpr(t2), + mkU8(2 * i))), + binop(Iop_MullS32, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(2 * i + 1)), + binop(Iop_GetElem32x4, + mkexpr(t2), + mkU8(2 * i + 1))))); + } + + putWReg(wd, + binop(Iop_Add64x2, + getWReg(wd), + binop(Iop_64HLtoV128, + mkexpr(tmp[1]), + mkexpr(tmp[0])))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x03: { /* DPADD_U.df */ + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + + switch (df) { + case 0x01: { /* DPADD_U.H */ + DIP("DPADD_U.H w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[8]; + Int i; + + for (i = 0; i < 8; i++) { + tmp[i] = newTemp(Ity_I16); + assign(tmp[i], + binop(Iop_Add16, + binop(Iop_MullU8, + binop(Iop_GetElem8x16, + mkexpr(t1), + mkU8(2 * i)), + binop(Iop_GetElem8x16, + mkexpr(t2), + mkU8(2 * i))), + binop(Iop_MullU8, + binop(Iop_GetElem8x16, + mkexpr(t1), + mkU8(2 * i + 1)), + binop(Iop_GetElem8x16, + mkexpr(t2), + mkU8(2 * i + 1))))); + } + + putWReg(wd, + binop(Iop_Add16x8, + getWReg(wd), + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_16HLto32, + mkexpr(tmp[7]), + mkexpr(tmp[6])), + binop(Iop_16HLto32, + mkexpr(tmp[5]), + mkexpr(tmp[4]))), + binop(Iop_32HLto64, + binop(Iop_16HLto32, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_16HLto32, + mkexpr(tmp[1]), + mkexpr(tmp[0])))))); + break; + } + + case 0x02: { /* DPADD_U.W */ + DIP("DPADD_U.W w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[4]; + Int i; + + for (i = 0; i < 4; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + binop(Iop_Add32, + binop(Iop_MullU16, + binop(Iop_GetElem16x8, + mkexpr(t1), + mkU8(2 * i)), + binop(Iop_GetElem16x8, + mkexpr(t2), + mkU8(2 * i))), + binop(Iop_MullU16, + binop(Iop_GetElem16x8, + mkexpr(t1), + mkU8(2 * i + 1)), + binop(Iop_GetElem16x8, + mkexpr(t2), + mkU8(2 * i + 1))))); + } + + putWReg(wd, + binop(Iop_Add32x4, + getWReg(wd), + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_32HLto64, + mkexpr(tmp[1]), + mkexpr(tmp[0]))))); + break; + } + + case 0x03: { /* DPADD_U.D */ + DIP("DPADD_U.D w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[2]; + Int i; + + for (i = 0; i < 2; i++) { + tmp[i] = newTemp(Ity_I64); + assign(tmp[i], + binop(Iop_Add64, + binop(Iop_MullU32, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(2 * i)), + binop(Iop_GetElem32x4, + mkexpr(t2), + mkU8(2 * i))), + binop(Iop_MullU32, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(2 * i + 1)), + binop(Iop_GetElem32x4, + mkexpr(t2), + mkU8(2 * i + 1))))); + } + + putWReg(wd, + binop(Iop_Add64x2, + getWReg(wd), + binop(Iop_64HLtoV128, + mkexpr(tmp[1]), + mkexpr(tmp[0])))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x04: { /* DPSUB_S.df */ + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + + switch (df) { + case 0x01: { /* DPSUB_S.H */ + DIP("DPSUB_S.H w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[8]; + Int i; + + for (i = 0; i < 8; i++) { + tmp[i] = newTemp(Ity_I16); + assign(tmp[i], + binop(Iop_Add16, + binop(Iop_MullS8, + binop(Iop_GetElem8x16, + mkexpr(t1), + mkU8(2 * i)), + binop(Iop_GetElem8x16, + mkexpr(t2), + mkU8(2 * i))), + binop(Iop_MullS8, + binop(Iop_GetElem8x16, + mkexpr(t1), + mkU8(2 * i + 1)), + binop(Iop_GetElem8x16, + mkexpr(t2), + mkU8(2 * i + 1))))); + } + + putWReg(wd, + binop(Iop_Sub16x8, + getWReg(wd), + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_16HLto32, + mkexpr(tmp[7]), + mkexpr(tmp[6])), + binop(Iop_16HLto32, + mkexpr(tmp[5]), + mkexpr(tmp[4]))), + binop(Iop_32HLto64, + binop(Iop_16HLto32, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_16HLto32, + mkexpr(tmp[1]), + mkexpr(tmp[0])))))); + break; + } + + case 0x02: { /* DPSUB_S.W */ + DIP("DPSUB_S.W w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[4]; + Int i; + + for (i = 0; i < 4; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + binop(Iop_Add32, + binop(Iop_MullS16, + binop(Iop_GetElem16x8, + mkexpr(t1), + mkU8(2 * i)), + binop(Iop_GetElem16x8, + mkexpr(t2), + mkU8(2 * i))), + binop(Iop_MullS16, + binop(Iop_GetElem16x8, + mkexpr(t1), + mkU8(2 * i + 1)), + binop(Iop_GetElem16x8, + mkexpr(t2), + mkU8(2 * i + 1))))); + } + + putWReg(wd, + binop(Iop_Sub32x4, + getWReg(wd), + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_32HLto64, + mkexpr(tmp[1]), + mkexpr(tmp[0]))))); + break; + } + + case 0x03: { /* DPSUB_S.D */ + DIP("DPSUB_S.D w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[2]; + Int i; + + for (i = 0; i < 2; i++) { + tmp[i] = newTemp(Ity_I64); + assign(tmp[i], + binop(Iop_Add64, + binop(Iop_MullS32, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(2 * i)), + binop(Iop_GetElem32x4, + mkexpr(t2), + mkU8(2 * i))), + binop(Iop_MullS32, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(2 * i + 1)), + binop(Iop_GetElem32x4, + mkexpr(t2), + mkU8(2 * i + 1))))); + } + + putWReg(wd, + binop(Iop_Sub64x2, + getWReg(wd), + binop(Iop_64HLtoV128, + mkexpr(tmp[1]), + mkexpr(tmp[0])))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x05: { /* DPSUB_U.df */ + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + + switch (df) { + case 0x01: { /* DPSUB_U.H */ + DIP("DPSUB_U.H w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[8]; + Int i; + + for (i = 0; i < 8; i++) { + tmp[i] = newTemp(Ity_I16); + assign(tmp[i], + binop(Iop_Add16, + binop(Iop_MullU8, + binop(Iop_GetElem8x16, + mkexpr(t1), + mkU8(2 * i)), + binop(Iop_GetElem8x16, + mkexpr(t2), + mkU8(2 * i))), + binop(Iop_MullU8, + binop(Iop_GetElem8x16, + mkexpr(t1), + mkU8(2 * i + 1)), + binop(Iop_GetElem8x16, + mkexpr(t2), + mkU8(2 * i + 1))))); + } + + putWReg(wd, + binop(Iop_Sub16x8, + getWReg(wd), + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_16HLto32, + mkexpr(tmp[7]), + mkexpr(tmp[6])), + binop(Iop_16HLto32, + mkexpr(tmp[5]), + mkexpr(tmp[4]))), + binop(Iop_32HLto64, + binop(Iop_16HLto32, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_16HLto32, + mkexpr(tmp[1]), + mkexpr(tmp[0])))))); + break; + } + + case 0x02: { /* DPSUB_U.W */ + DIP("DPSUB_U.W w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[4]; + Int i; + + for (i = 0; i < 4; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + binop(Iop_Add32, + binop(Iop_MullU16, + binop(Iop_GetElem16x8, + mkexpr(t1), + mkU8(2 * i)), + binop(Iop_GetElem16x8, + mkexpr(t2), + mkU8(2 * i))), + binop(Iop_MullU16, + binop(Iop_GetElem16x8, + mkexpr(t1), + mkU8(2 * i + 1)), + binop(Iop_GetElem16x8, + mkexpr(t2), + mkU8(2 * i + 1))))); + } + + putWReg(wd, + binop(Iop_Sub32x4, + getWReg(wd), + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_32HLto64, + mkexpr(tmp[1]), + mkexpr(tmp[0]))))); + break; + } + + case 0x03: { /* DPSUB_U.D */ + DIP("DPSUB_U.D w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[2]; + Int i; + + for (i = 0; i < 2; i++) { + tmp[i] = newTemp(Ity_I64); + assign(tmp[i], + binop(Iop_Add64, + binop(Iop_MullU32, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(2 * i)), + binop(Iop_GetElem32x4, + mkexpr(t2), + mkU8(2 * i))), + binop(Iop_MullU32, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(2 * i + 1)), + binop(Iop_GetElem32x4, + mkexpr(t2), + mkU8(2 * i + 1))))); + } + + putWReg(wd, + binop(Iop_Sub64x2, + getWReg(wd), + binop(Iop_64HLtoV128, + mkexpr(tmp[1]), + mkexpr(tmp[0])))); + break; + } + + default: + return -1; + } + + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_3R_14(UInt cins, UChar wd, UChar ws) { /* 3R (0x14) */ + IRTemp t1, t2, t3, t4; + IRType ty; + UShort operation; + UChar df, wt; + + operation = (cins & 0x03800000) >> 23; + df = (cins & 0x00600000) >> 21; + wt = (cins & 0x001F0000) >> 16; + ty = mode64 ? Ity_I64 : Ity_I32; + + switch (operation) { + case 0x00: { /* SLD.df */ + switch (df) { + case 0x00: { + DIP("SLD.B w%d, w%d[%d]", wd, ws, wt); + t1 = newTemp(Ity_I32); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, + binop(Iop_Shl32, + binop(Iop_And32, + mkNarrowTo32(ty, + getIReg(wt)), + mkU32(15)), + mkU8(3))); + assign(t2, + binop(Iop_ShrV128, + getWReg(ws), + unop(Iop_32to8, mkexpr(t1)))); + assign(t3, + binop(Iop_ShlV128, + getWReg(wd), + unop(Iop_32to8, + binop(Iop_Sub32, + mkU32(128), + mkexpr(t1))))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t2), mkexpr(t3))); + break; + } + + case 0x01: {/* SLD.H */ + DIP("SLD.H w%d, w%d[%d]", wd, ws, wt); + t1 = newTemp(Ity_I32); + t2 = newTemp(Ity_I64); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, + binop(Iop_Shl32, + binop(Iop_And32, + mkNarrowTo32(ty, + getIReg(wt)), + mkU32(7)), + mkU8(3))); + assign(t2, + binop(Iop_32HLto64, mkU32(0), mkexpr(t1))); + assign(t3, + binop(Iop_Shr64x2, + getWReg(ws), + binop(Iop_64HLtoV128, + mkexpr(t2), mkexpr(t2)))); + assign(t4, + binop(Iop_Shl64x2, + getWReg(wd), + binop(Iop_Sub64x2, + binop(Iop_64HLtoV128, + mkU64(0x40ul), + mkU64(0x40ul)), + binop(Iop_64HLtoV128, + mkexpr(t2), + mkexpr(t2))))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t3), + IRExpr_ITE( + binop(Iop_CmpNE32, + mkexpr(t1), mkU32(0)), + mkexpr(t4), + binop(Iop_64HLtoV128, + mkU64(0), mkU64(0))))); + break; + } + + case 0x02: {/* SLD.W */ + DIP("SLD.W w%d, w%d[%d]", wd, ws, wt); + t1 = newTemp(Ity_I32); + t2 = newTemp(Ity_I64); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, + binop(Iop_Shl32, + binop(Iop_And32, + mkNarrowTo32(ty, + getIReg(wt)), + mkU32(3)), + mkU8(3))); + assign(t2, + binop(Iop_32HLto64, + mkexpr(t1), mkexpr(t1))); + assign(t3, + binop(Iop_Shr32x4, + getWReg(ws), + binop(Iop_64HLtoV128, + mkexpr(t2), mkexpr(t2)))); + assign(t4, + binop(Iop_Shl32x4, + getWReg(wd), + binop(Iop_Sub32x4, + binop(Iop_64HLtoV128, + mkU64(0x2000000020ul), + mkU64(0x2000000020ul)), + binop(Iop_64HLtoV128, + mkexpr(t2), + mkexpr(t2))))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t3), + IRExpr_ITE( + binop(Iop_CmpNE32, + mkexpr(t1), mkU32(0)), + mkexpr(t4), + binop(Iop_64HLtoV128, + mkU64(0), mkU64(0))))); + break; + } + + case 0x03: { /* SLD.D */ + DIP("SLD.D w%d, w%d[%d]", wd, ws, wt); + t1 = newTemp(Ity_I32); + t2 = newTemp(Ity_I64); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, + binop(Iop_Shl32, + binop(Iop_And32, + mkNarrowTo32(ty, + getIReg(wt)), + mkU32(1)), + mkU8(3))); + assign(t2, + binop(Iop_32HLto64, + binop(Iop_Or32, + mkexpr(t1), + binop(Iop_Shl32, + mkexpr(t1), mkU8(16))), + binop(Iop_Or32, + mkexpr(t1), + binop(Iop_Shl32, + mkexpr(t1), mkU8(16))))); + assign(t3, + binop(Iop_Shr16x8, + getWReg(ws), + binop(Iop_64HLtoV128, + mkexpr(t2), mkexpr(t2)))); + assign(t4, + binop(Iop_Shl16x8, + getWReg(wd), + binop(Iop_Sub16x8, + binop(Iop_64HLtoV128, + mkU64(0x10001000100010ul), + mkU64(0x10001000100010ul)), + binop(Iop_64HLtoV128, + mkexpr(t2), + mkexpr(t2))))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t3), + IRExpr_ITE( + binop(Iop_CmpNE32, + mkexpr(t1), mkU32(0)), + mkexpr(t4), + binop(Iop_64HLtoV128, + mkU64(0), mkU64(0))))); + break; + } + } + + break; + } + + case 0x01: { /* SPLAT.df */ + switch (df) { + Int i; + + case 0x00: { /* SPLAT.B */ + DIP("SPLAT.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_I32); + assign(t1, getWReg(ws)); + assign(t2, + mkNarrowTo32(ty, getIReg(wt))); + IRTemp tmp[16]; + + for (i = 0; i < 16; i++) { + tmp[i] = newTemp(Ity_I8); + assign(tmp[i], + binop(Iop_GetElem8x16, + mkexpr(t1), + unop(Iop_32to8, mkexpr(t2)))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_16HLto32, + binop(Iop_8HLto16, + mkexpr(tmp[15]), + mkexpr(tmp[14])), + binop(Iop_8HLto16, + mkexpr(tmp[13]), + mkexpr(tmp[12]))), + binop(Iop_16HLto32, + binop(Iop_8HLto16, + mkexpr(tmp[11]), + mkexpr(tmp[10])), + binop(Iop_8HLto16, + mkexpr(tmp[9]), + mkexpr(tmp[8])))), + binop(Iop_32HLto64, + binop(Iop_16HLto32, + binop(Iop_8HLto16, + mkexpr(tmp[7]), + mkexpr(tmp[6])), + binop(Iop_8HLto16, + mkexpr(tmp[5]), + mkexpr(tmp[4]))), + binop(Iop_16HLto32, + binop(Iop_8HLto16, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_8HLto16, + mkexpr(tmp[1]), + mkexpr(tmp[0])))))); + break; + } + + case 0x01: { /* SPLAT.H */ + DIP("SPLAT.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_I32); + assign(t1, getWReg(ws)); + assign(t2, + mkNarrowTo32(ty, getIReg(wt))); + IRTemp tmp[8]; + + for (i = 0; i < 8; i++) { + tmp[i] = newTemp(Ity_I16); + assign(tmp[i], + binop(Iop_GetElem16x8, + mkexpr(t1), + unop(Iop_32to8, mkexpr(t2)))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_16HLto32, + mkexpr(tmp[7]), + mkexpr(tmp[6])), + binop(Iop_16HLto32, + mkexpr(tmp[5]), + mkexpr(tmp[4]))), + binop(Iop_32HLto64, + binop(Iop_16HLto32, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_16HLto32, + mkexpr(tmp[1]), + mkexpr(tmp[0]))))); + break; + } + + case 0x02: { /* SPLAT.W */ + DIP("SPLAT.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_I32); + assign(t1, getWReg(ws)); + assign(t2, + mkNarrowTo32(ty, getIReg(wt))); + IRTemp tmp[4]; + + for (i = 0; i < 4; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + binop(Iop_GetElem32x4, + mkexpr(t1), + unop(Iop_32to8, mkexpr(t2)))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_32HLto64, + mkexpr(tmp[1]), + mkexpr(tmp[0])))); + break; + } + + case 0x03: { /* SPLAT.D */ + DIP("SPLAT.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_I32); + assign(t1, getWReg(ws)); + assign(t2, + mkNarrowTo32(ty, getIReg(wt))); + IRTemp tmp[2]; + + for (i = 0; i < 2; i++) { + tmp[i] = newTemp(Ity_I64); + assign(tmp[i], + binop(Iop_GetElem64x2, + mkexpr(t1), + unop(Iop_32to8, mkexpr(t2)))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + mkexpr(tmp[1]), mkexpr(tmp[0]))); + break; + } + } + + break; + } + + case 0x02: { /* PCKEV.df */ + switch (df) { + case 0x00: { /* PCKEV.B */ + DIP("PCKEV.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_PackEvenLanes8x16, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* PCKEV.H */ + DIP("PCKEV.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_PackEvenLanes16x8, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* PCKEV.W */ + DIP("PCKEV.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_PackEvenLanes32x4, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* PCKEV.D */ + DIP("PCKEV.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_InterleaveLO64x2, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x03: { /* PCKOD.df */ + switch (df) { + case 0x00: { /* PCKOD.B */ + DIP("PCKOD.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_PackOddLanes8x16, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* PCKOD.H */ + DIP("PCKOD.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_PackOddLanes16x8, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* PCKOD.W */ + DIP("PCKOD.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_PackOddLanes32x4, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* PCKOD.D */ + DIP("PCKOD.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_InterleaveHI64x2, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x04: { /* ILVL.df */ + switch (df) { + case 0x00: { /* ILVL.B */ + DIP("ILVL.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_InterleaveHI8x16, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* ILVL.H */ + DIP("ILVL.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_InterleaveHI16x8, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* ILVL.W */ + DIP("ILVL.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_InterleaveHI32x4, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* ILVL.D */ + DIP("ILVL.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_InterleaveHI64x2, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x05: { /* ILVR.df */ + switch (df) { + case 0x00: { /* ILVL.B */ + DIP("ILVL.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_InterleaveLO8x16, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* ILVL.H */ + DIP("ILVL.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_InterleaveLO16x8, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* ILVL.W */ + DIP("ILVL.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_InterleaveLO32x4, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* ILVL.D */ + DIP("ILVL.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_InterleaveLO64x2, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + } + + break; + } + + case 0x06: { /* ILVEV.df */ + switch (df) { + case 0x00: { /* ILVEV.B */ + DIP("ILVEV.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_InterleaveEvenLanes8x16, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* ILVEV.H */ + DIP("ILVEV.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_InterleaveEvenLanes16x8, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* ILVEV.W */ + DIP("ILVEV.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_InterleaveEvenLanes32x4, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* ILVEV.D */ + DIP("ILVEV.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_InterleaveLO64x2, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x07: { /* ILVOD.df */ + switch (df) { + case 0x00: { /* ILVOD.B */ + DIP("ILVOD.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_InterleaveOddLanes8x16, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* ILVOD.H */ + DIP("ILVOD.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_InterleaveOddLanes16x8, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* ILVOD.W */ + DIP("ILVOD.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_InterleaveOddLanes32x4, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* ILVOD.D */ + DIP("ILVOD.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_InterleaveHI64x2, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_3R_15(UInt cins, UChar wd, UChar ws) { /* 3R (0x15) */ + IRTemp t1, t2, t3, t4; + UShort operation; + UChar df, wt; + + operation = (cins & 0x03800000) >> 23; + df = (cins & 0x00600000) >> 21; + wt = (cins & 0x001F0000) >> 16; + + switch (operation) { + case 0x00: { /* VSHF.df */ + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(wd)); + assign(t2, getWReg(ws)); + assign(t3, getWReg(wt)); + + switch (df) { + case 0x00: { /* VSHF.B */ + DIP("VSHF.B w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[16]; + Int i; + + for (i = 0; i < 16; i++) { + tmp[i] = newTemp(Ity_I8); + assign(tmp[i], + IRExpr_ITE( + binop(Iop_CmpEQ8, + binop(Iop_And8, + binop(Iop_GetElem8x16, + mkexpr(t1), + mkU8(i)), + mkU8(0xC0)), + mkU8(0x0)), + IRExpr_ITE( + binop(Iop_CmpEQ8, + binop(Iop_And8, + binop(Iop_GetElem8x16, + mkexpr(t1), + mkU8(i)), + mkU8(0x10)), + mkU8(0x0)), + binop(Iop_GetElem8x16, + mkexpr(t3), + binop(Iop_GetElem8x16, + mkexpr(t1), + mkU8(i))), + binop(Iop_GetElem8x16, + mkexpr(t2), + binop(Iop_GetElem8x16, + mkexpr(t1), + mkU8(i)))), + mkU8(0x0))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_16HLto32, + binop(Iop_8HLto16, + mkexpr(tmp[15]), + mkexpr(tmp[14])), + binop(Iop_8HLto16, + mkexpr(tmp[13]), + mkexpr(tmp[12]))), + binop(Iop_16HLto32, + binop(Iop_8HLto16, + mkexpr(tmp[11]), + mkexpr(tmp[10])), + binop(Iop_8HLto16, + mkexpr(tmp[9]), + mkexpr(tmp[8])))), + binop(Iop_32HLto64, + binop(Iop_16HLto32, + binop(Iop_8HLto16, + mkexpr(tmp[7]), + mkexpr(tmp[6])), + binop(Iop_8HLto16, + mkexpr(tmp[5]), + mkexpr(tmp[4]))), + binop(Iop_16HLto32, + binop(Iop_8HLto16, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_8HLto16, + mkexpr(tmp[1]), + mkexpr(tmp[0])))))); + break; + } + + case 0x01: { /* VSHF.H */ + DIP("VSHF.H w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[8]; + Int i; + + for (i = 0; i < 8; i++) { + tmp[i] = newTemp(Ity_I16); + assign(tmp[i], + IRExpr_ITE( + binop(Iop_CmpEQ16, + binop(Iop_And16, + binop(Iop_GetElem16x8, + mkexpr(t1), + mkU8(i)), + mkU16(0xC0)), + mkU16(0x0)), + IRExpr_ITE( + binop(Iop_CmpEQ16, + binop(Iop_And16, + binop(Iop_GetElem16x8, + mkexpr(t1), + mkU8(i)), + mkU16(0x08)), + mkU16(0x0)), + binop(Iop_GetElem16x8, + mkexpr(t3), + unop(Iop_16to8, + binop(Iop_GetElem16x8, + mkexpr(t1), + mkU8(i)))), + binop(Iop_GetElem16x8, + mkexpr(t2), + unop(Iop_16to8, + binop(Iop_GetElem16x8, + mkexpr(t1), + mkU8(i))))), + mkU16(0x0))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_16HLto32, + mkexpr(tmp[7]), + mkexpr(tmp[6])), + binop(Iop_16HLto32, + mkexpr(tmp[5]), + mkexpr(tmp[4]))), + binop(Iop_32HLto64, + binop(Iop_16HLto32, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_16HLto32, + mkexpr(tmp[1]), + mkexpr(tmp[0]))))); + break; + } + + case 0x02: { /* VSHF.W */ + DIP("VSHF.W w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[4]; + Int i; + + for (i = 0; i < 4; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + IRExpr_ITE( + binop(Iop_CmpEQ32, + binop(Iop_And32, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(i)), + mkU32(0xC0)), + mkU32(0x0)), + IRExpr_ITE( + binop(Iop_CmpEQ32, + binop(Iop_And32, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(i)), + mkU32(0x04)), + mkU32(0x0)), + binop(Iop_GetElem32x4, + mkexpr(t3), + unop(Iop_32to8, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(i)))), + binop(Iop_GetElem32x4, + mkexpr(t2), + unop(Iop_32to8, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(i))))), + mkU32(0x0))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_32HLto64, + mkexpr(tmp[1]), + mkexpr(tmp[0])))); + break; + } + + case 0x03: { /* VSHF.D */ + DIP("VSHF.D w%d, w%d, w%d", wd, ws, wt); + IRTemp tmp[2]; + Int i; + + for (i = 0; i < 2; i++) { + tmp[i] = newTemp(Ity_I64); + assign(tmp[i], + IRExpr_ITE( + binop(Iop_CmpEQ64, + binop(Iop_And64, + binop(Iop_GetElem64x2, + mkexpr(t1), + mkU8(i)), + mkU64(0xC0)), + mkU64(0x0)), + IRExpr_ITE( + binop(Iop_CmpEQ64, + binop(Iop_And64, + binop(Iop_GetElem64x2, + mkexpr(t1), + mkU8(i)), + mkU64(0x02)), + mkU64(0x0)), + binop(Iop_GetElem64x2, + mkexpr(t3), + unop(Iop_64to8, + binop(Iop_GetElem64x2, + mkexpr(t1), + mkU8(i)))), + binop(Iop_GetElem64x2, + mkexpr(t2), + unop(Iop_64to8, + binop(Iop_GetElem64x2, + mkexpr(t1), + mkU8(i))))), + mkU64(0x0))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + mkexpr(tmp[1]), mkexpr(tmp[0]))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x01: { /* SRAR.df */ + switch (df) { + case 0x00: { /* SRAR.B */ + DIP("SRAR.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, + binop(Iop_Sar8x16, + getWReg(ws), + getWReg(wt))); + assign(t2, + binop(Iop_Sub8x16, + binop(Iop_64HLtoV128, + mkU64(0x808080808080808ull), + mkU64(0x808080808080808ull)), + getWReg(wt))); + assign(t4, + unop(Iop_NotV128, + binop(Iop_CmpEQ8x16, + binop(Iop_ShlN8x16, + getWReg(wt), + mkU8(5)), + binop(Iop_64HLtoV128, + mkU64(0), mkU64(0))))); + assign(t3, + binop(Iop_ShrN8x16, + binop(Iop_AndV128, + binop(Iop_Shl8x16, + getWReg(ws), + mkexpr(t2)), + mkexpr(t4)), + mkU8(7))); + putWReg(wd, + binop(Iop_Add8x16, + mkexpr(t1), mkexpr(t3))); + break; + } + + case 0x01: { /* SRAR.H */ + DIP("SRAR.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, + binop(Iop_Sar16x8, + getWReg(ws), + getWReg(wt))); + assign(t2, + binop(Iop_Sub16x8, + binop(Iop_64HLtoV128, + mkU64(0x10001000100010ul), + mkU64(0x10001000100010ul)), + getWReg(wt))); + assign(t4, + unop(Iop_NotV128, + binop(Iop_CmpEQ16x8, + binop(Iop_ShlN16x8, + getWReg(wt), + mkU8(12)), + binop(Iop_64HLtoV128, + mkU64(0), mkU64(0))))); + assign(t3, + binop(Iop_ShrN16x8, + binop(Iop_AndV128, + binop(Iop_Shl16x8, + getWReg(ws), + mkexpr(t2)), + mkexpr(t4)), + mkU8(15))); + putWReg(wd, + binop(Iop_Add16x8, + mkexpr(t1), mkexpr(t3))); + break; + } + + case 0x02: { /* SRAR.W */ + DIP("SRAR.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); // shifted + t2 = newTemp(Ity_V128); // 32 - wt + t3 = newTemp(Ity_V128); // rv + t4 = newTemp(Ity_V128); // wt % 32 == 0 + assign(t1, + binop(Iop_Sar32x4, + getWReg(ws), + getWReg(wt))); + assign(t2, + binop(Iop_Sub32x4, + binop(Iop_64HLtoV128, + mkU64(0x2000000020ul), + mkU64(0x2000000020ul)), + getWReg(wt))); + assign(t4, + unop(Iop_NotV128, + binop(Iop_CmpEQ32x4, + binop(Iop_ShlN32x4, + getWReg(wt), + mkU8(27)), + binop(Iop_64HLtoV128, + mkU64(0), mkU64(0))))); + assign(t3, + binop(Iop_ShrN32x4, + binop(Iop_AndV128, + binop(Iop_Shl32x4, + getWReg(ws), + mkexpr(t2)), + mkexpr(t4)), + mkU8(31))); + putWReg(wd, + binop(Iop_Add32x4, + mkexpr(t1), mkexpr(t3))); + break; + } + + case 0x03: { /* SRAR.D */ + DIP("SRAR.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, + binop(Iop_Sar64x2, + getWReg(ws), + getWReg(wt))); + assign(t2, + binop(Iop_Sub64x2, + binop(Iop_64HLtoV128, + mkU64(64ul), mkU64(64ul)), + getWReg(wt))); + assign(t4, + unop(Iop_NotV128, + binop(Iop_CmpEQ64x2, + binop(Iop_ShlN64x2, + getWReg(wt), + mkU8(58)), + binop(Iop_64HLtoV128, + mkU64(0), mkU64(0))))); + assign(t3, + binop(Iop_ShrN64x2, + binop(Iop_AndV128, + binop(Iop_Shl64x2, + getWReg(ws), + mkexpr(t2)), + mkexpr(t4)), + mkU8(63))); + putWReg(wd, + binop(Iop_Add64x2, + mkexpr(t1), mkexpr(t3))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x02: { /* SRLR.df */ + switch (df) { + case 0x00: { /* SRLR.B */ + DIP("SRLR.B w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, + binop(Iop_Shr8x16, + getWReg(ws), + getWReg(wt))); + assign(t2, + binop(Iop_Sub8x16, + binop(Iop_64HLtoV128, + mkU64(0x808080808080808ull), + mkU64(0x808080808080808ull)), + getWReg(wt))); + assign(t4, + unop(Iop_NotV128, + binop(Iop_CmpEQ8x16, + binop(Iop_ShlN8x16, + getWReg(wt), + mkU8(5)), + binop(Iop_64HLtoV128, + mkU64(0), mkU64(0))))); + assign(t3, + binop(Iop_ShrN8x16, + binop(Iop_AndV128, + binop(Iop_Shl8x16, + getWReg(ws), + mkexpr(t2)), + mkexpr(t4)), + mkU8(7))); + putWReg(wd, + binop(Iop_Add8x16, + mkexpr(t1), mkexpr(t3))); + break; + } + + case 0x01: { /* SRLR.H */ + DIP("SRLR.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, + binop(Iop_Shr16x8, + getWReg(ws), + getWReg(wt))); + assign(t2, + binop(Iop_Sub16x8, + binop(Iop_64HLtoV128, + mkU64(0x10001000100010ul), + mkU64(0x10001000100010ul)), + getWReg(wt))); + assign(t4, + unop(Iop_NotV128, + binop(Iop_CmpEQ16x8, + binop(Iop_ShlN16x8, + getWReg(wt), + mkU8(12)), + binop(Iop_64HLtoV128, + mkU64(0), mkU64(0))))); + assign(t3, + binop(Iop_ShrN16x8, + binop(Iop_AndV128, + binop(Iop_Shl16x8, + getWReg(ws), + mkexpr(t2)), + mkexpr(t4)), + mkU8(15))); + putWReg(wd, + binop(Iop_Add16x8, + mkexpr(t1), mkexpr(t3))); + break; + } + + case 0x02: { /* SRLR.W */ + DIP("SRLR.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, + binop(Iop_Shr32x4, + getWReg(ws), + getWReg(wt))); + assign(t2, + binop(Iop_Sub32x4, + binop(Iop_64HLtoV128, + mkU64(0x2000000020ul), + mkU64(0x2000000020ul)), + getWReg(wt))); + assign(t4, + unop(Iop_NotV128, + binop(Iop_CmpEQ32x4, + binop(Iop_ShlN32x4, + getWReg(wt), + mkU8(27)), + binop(Iop_64HLtoV128, + mkU64(0), mkU64(0))))); + assign(t3, + binop(Iop_ShrN32x4, + binop(Iop_AndV128, + binop(Iop_Shl32x4, + getWReg(ws), + mkexpr(t2)), + mkexpr(t4)), + mkU8(31))); + putWReg(wd, + binop(Iop_Add32x4, + mkexpr(t1), mkexpr(t3))); + break; + } + + case 0x03: { /* SRLR.D */ + DIP("SRLR.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, + binop(Iop_Shr64x2, + getWReg(ws), + getWReg(wt))); + assign(t2, + binop(Iop_Sub64x2, + binop(Iop_64HLtoV128, + mkU64(64ul), mkU64(64ul)), + getWReg(wt))); + assign(t4, + unop(Iop_NotV128, + binop(Iop_CmpEQ64x2, + binop(Iop_ShlN64x2, + getWReg(wt), + mkU8(58)), + binop(Iop_64HLtoV128, + mkU64(0), mkU64(0))))); + assign(t3, + binop(Iop_ShrN64x2, + binop(Iop_AndV128, + binop(Iop_Shl64x2, + getWReg(ws), + mkexpr(t2)), + mkexpr(t4)), + mkU8(63))); + putWReg(wd, + binop(Iop_Add64x2, + mkexpr(t1), mkexpr(t3))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x04: { /* HADD_S.df */ + switch (df) { + case 0x01: { /* HADD_S.H */ + DIP("HADD_S.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_Add16x8, + binop(Iop_SarN16x8, + mkexpr(t1), mkU8(8)), + binop(Iop_SarN16x8, + binop(Iop_ShlN16x8, + mkexpr(t2), mkU8(8)), + mkU8(8)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* HADD_S.W */ + DIP("HADD_S.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_Add32x4, + binop(Iop_SarN32x4, + mkexpr(t1), mkU8(16)), + binop(Iop_SarN32x4, + binop(Iop_ShlN32x4, + mkexpr(t2), mkU8(16)), + mkU8(16)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* HADD_S.D */ + DIP("HADD_S.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_Add64x2, + binop(Iop_SarN64x2, + mkexpr(t1), mkU8(32)), + binop(Iop_SarN64x2, + binop(Iop_ShlN64x2, + mkexpr(t2), mkU8(32)), + mkU8(32)))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x05: { /* HADD_U.df */ + switch (df) { + case 0x01: { /* HADD_U.H */ + DIP("HADD_U.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_Add16x8, + binop(Iop_ShrN16x8, + mkexpr(t1), mkU8(8)), + binop(Iop_ShrN16x8, + binop(Iop_ShlN16x8, + mkexpr(t2), mkU8(8)), + mkU8(8)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* HADD_U.W */ + DIP("HADD_U.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_Add32x4, + binop(Iop_ShrN32x4, + mkexpr(t1), mkU8(16)), + binop(Iop_ShrN32x4, + binop(Iop_ShlN32x4, + mkexpr(t2), mkU8(16)), + mkU8(16)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* HADD_U.D */ + DIP("HADD_U.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_Add64x2, + binop(Iop_ShrN64x2, + mkexpr(t1), mkU8(32)), + binop(Iop_ShrN64x2, + binop(Iop_ShlN64x2, + mkexpr(t2), mkU8(32)), + mkU8(32)))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x06: { /* HSUB_S.df */ + switch (df) { + case 0x01: { /* HSUB_S.H */ + DIP("HSUB_S.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_Sub16x8, + binop(Iop_SarN16x8, + mkexpr(t1), mkU8(8)), + binop(Iop_SarN16x8, + binop(Iop_ShlN16x8, + mkexpr(t2), mkU8(8)), + mkU8(8)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* HSUB_S.W */ + DIP("HSUB_S.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_Sub32x4, + binop(Iop_SarN32x4, + mkexpr(t1), mkU8(16)), + binop(Iop_SarN32x4, + binop(Iop_ShlN32x4, + mkexpr(t2), mkU8(16)), + mkU8(16)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* HSUB_S.D */ + DIP("HSUB_S.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_Sub64x2, + binop(Iop_SarN64x2, + mkexpr(t1), mkU8(32)), + binop(Iop_SarN64x2, + binop(Iop_ShlN64x2, + mkexpr(t2), mkU8(32)), + mkU8(32)))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x07: { /* HSUB_U.df */ + switch (df) { + case 0x01: { /* HSUB_U.H */ + DIP("HSUB_U.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_Sub16x8, + binop(Iop_ShrN16x8, + mkexpr(t1), mkU8(8)), + binop(Iop_ShrN16x8, + binop(Iop_ShlN16x8, + mkexpr(t2), mkU8(8)), + mkU8(8)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* HSUB_U.W */ + DIP("HSUB_U.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_Sub32x4, + binop(Iop_ShrN32x4, + mkexpr(t1), mkU8(16)), + binop(Iop_ShrN32x4, + binop(Iop_ShlN32x4, + mkexpr(t2), mkU8(16)), + mkU8(16)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* HSUB_U.D */ + DIP("HSUB_U.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_Sub64x2, + binop(Iop_ShrN64x2, + mkexpr(t1), mkU8(32)), + binop(Iop_ShrN64x2, + binop(Iop_ShlN64x2, + mkexpr(t2), mkU8(32)), + mkU8(32)))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_3R_1A(UInt cins, UChar wd, UChar ws) { /* 3R (0x1A) */ + UShort operation; + UChar df, wt; + + operation = (cins & 0x03C00000) >> 22; + df = (cins & 0x00200000) >> 21; + wt = (cins & 0x001F0000) >> 16; + + switch (operation) { + case 0x00: { /* FCAF.df */ + switch (df) { + case 0x00: { /* FCAF.W */ + DIP("FCAF.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCAFW, 2); + putWReg(wd, binop(Iop_64HLtoV128, mkU64(0ul), mkU64(0ul))); + break; + } + + case 0x01: { /* FCAF.D */ + DIP("FCAF.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCAFD, 2); + putWReg(wd, binop(Iop_64HLtoV128, mkU64(0ul), mkU64(0ul))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x01: { /* FCUN.df */ + switch (df) { + case 0x00: { /* FCUN.W */ + DIP("FCUN.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCUNW, 2); + putWReg(wd, binop(Iop_CmpUN32Fx4, + getWReg(ws), + getWReg(wt))); + break; + } + + case 0x01: { /* FCUN.D */ + DIP("FCUN.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCUND, 2); + putWReg(wd, binop(Iop_CmpUN64Fx2, + getWReg(ws), + getWReg(wt))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x02: { /* FCEQ.df */ + switch (df) { + case 0x00: { /* FCEQ.W */ + DIP("FCEQ.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCEQW, 2); + putWReg(wd, binop(Iop_CmpEQ32Fx4, + getWReg(ws), + getWReg(wt))); + break; + } + + case 0x01: { /* FCEQ.D */ + DIP("FCEQ.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCEQD, 2); + putWReg(wd, binop(Iop_CmpEQ64Fx2, + getWReg(ws), + getWReg(wt))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x03: { /* FCUEQ.df */ + switch (df) { + case 0x00: { /* FCUEQ.W */ + DIP("FCUEQ.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCUEQW, 2); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_CmpEQ32Fx4, + getWReg(ws), + getWReg(wt)), + binop(Iop_CmpUN32Fx4, + getWReg(ws), + getWReg(wt)))); + break; + } + + case 0x01: { /* FCUEQ.D */ + DIP("FCUEQ.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCUEQD, 2); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_CmpEQ64Fx2, + getWReg(ws), + getWReg(wt)), + binop(Iop_CmpUN64Fx2, + getWReg(ws), + getWReg(wt)))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x04: { /* FCLT.df */ + switch (df) { + case 0x00: { /* FCLT.W */ + DIP("FCLT.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCLTW, 2); + putWReg(wd, + binop(Iop_CmpLT32Fx4, + getWReg(ws), + getWReg(wt))); + break; + } + + case 0x01: { /* FCLT.D */ + DIP("FCLT.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCLTD, 2); + putWReg(wd, + binop(Iop_CmpLT64Fx2, + getWReg(ws), + getWReg(wt))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x05: { /* FCULT.df */ + switch (df) { + case 0x00: { /* FCULT.W */ + DIP("FCULT.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCULTW, 2); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_CmpLT32Fx4, + getWReg(ws), + getWReg(wt)), + binop(Iop_CmpUN32Fx4, + getWReg(ws), + getWReg(wt)))); + break; + } + + case 0x01: { /* FCULT.D */ + DIP("FCULT.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCULTD, 2); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_CmpLT64Fx2, + getWReg(ws), + getWReg(wt)), + binop(Iop_CmpUN64Fx2, + getWReg(ws), + getWReg(wt)))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x06: { /* FCLE.df */ + switch (df) { + case 0x00: { /* FCLE.W */ + DIP("FCLE.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCLEW, 2); + putWReg(wd, + binop(Iop_CmpLE32Fx4, + getWReg(ws), + getWReg(wt))); + break; + } + + case 0x01: { /* FCLE.D */ + DIP("FCLE.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCLED, 2); + putWReg(wd, + binop(Iop_CmpLE64Fx2, + getWReg(ws), + getWReg(wt))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x07: { /* FCULE.df */ + switch (df) { + case 0x00: { /* FCULE.W */ + DIP("FCULE.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCULEW, 2); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_CmpLE32Fx4, + getWReg(ws), + getWReg(wt)), + binop(Iop_CmpUN32Fx4, + getWReg(ws), + getWReg(wt)))); + break; + } + + case 0x01: { /* FCULE.D */ + DIP("FCULE.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCULED, 2); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_CmpLE64Fx2, + getWReg(ws), + getWReg(wt)), + binop(Iop_CmpUN64Fx2, + getWReg(ws), + getWReg(wt)))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x08: { /* FSAF.df */ + switch (df) { + case 0x00: { /* FSAF.W */ + DIP("FSAF.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSAFW, 2); + putWReg(wd, + binop(Iop_64HLtoV128, + mkU64(0ul), mkU64(0ul))); + break; + } + + case 0x01: { /* FSAF.D */ + DIP("FSAF.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSAFD, 2); + putWReg(wd, + binop(Iop_64HLtoV128, + mkU64(0ul), mkU64(0ul))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x09: { /* FSUN.df */ + switch (df) { + case 0x00: { /* FSUN.W */ + DIP("FSUN.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSUNW, 2); + putWReg(wd, + binop(Iop_CmpUN32Fx4, + getWReg(ws), + getWReg(wt))); + break; + } + + case 0x01: { /* FSUN.D */ + DIP("FSUN.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSUND, 2); + putWReg(wd, + binop(Iop_CmpUN64Fx2, + getWReg(ws), + getWReg(wt))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x0A: { /* FSEQ.df */ + switch (df) { + case 0x00: { /* FSEQ.W */ + DIP("FSEQ.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSEQW, 2); + putWReg(wd, + binop(Iop_CmpEQ32Fx4, + getWReg(ws), + getWReg(wt))); + break; + } + + case 0x01: { /* FSEQ.D */ + DIP("FSEQ.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSEQD, 2); + putWReg(wd, + binop(Iop_CmpEQ64Fx2, + getWReg(ws), + getWReg(wt))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x0B: { /* FSUEQ.df */ + switch (df) { + case 0x00: { /* FSUEQ.W */ + DIP("FSUEQ.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSUEQW, 2); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_CmpEQ32Fx4, + getWReg(ws), + getWReg(wt)), + binop(Iop_CmpUN32Fx4, + getWReg(ws), + getWReg(wt)))); + break; + } + + case 0x01: { /* FSUEQ.D */ + DIP("FSUEQ.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSUEQD, 2); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_CmpEQ64Fx2, + getWReg(ws), + getWReg(wt)), + binop(Iop_CmpUN64Fx2, + getWReg(ws), + getWReg(wt)))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x0C: { /* FSLT.df */ + switch (df) { + case 0x00: { /* FSLT.W */ + DIP("FSLT.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSLTW, 2); + putWReg(wd, + binop(Iop_CmpLT32Fx4, + getWReg(ws), + getWReg(wt))); + break; + } + + case 0x01: { /* FSLT.D */ + DIP("FSLT.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSLTD, 2); + putWReg(wd, + binop(Iop_CmpLT64Fx2, + getWReg(ws), + getWReg(wt))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x0D: { /* FSULT.df */ + switch (df) { + case 0x00: { /* FSULT.W */ + DIP("FSULT.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSULTW, 2); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_CmpLT32Fx4, + getWReg(ws), + getWReg(wt)), + binop(Iop_CmpUN32Fx4, + getWReg(ws), + getWReg(wt)))); + break; + } + + case 0x01: { /* FSULT.D */ + DIP("FSULT.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSULTD, 2); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_CmpLT64Fx2, + getWReg(ws), + getWReg(wt)), + binop(Iop_CmpUN64Fx2, + getWReg(ws), + getWReg(wt)))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x0E: { /* FSLE.df */ + switch (df) { + case 0x00: { /* FSLE.W */ + DIP("FSLE.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSLEW, 2); + putWReg(wd, + binop(Iop_CmpLE32Fx4, + getWReg(ws), + getWReg(wt))); + break; + } + + case 0x01: { /* FSLE.D */ + DIP("FSLE.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSLED, 2); + putWReg(wd, + binop(Iop_CmpLE64Fx2, + getWReg(ws), + getWReg(wt))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x0F: { /* FSULE.df */ + switch (df) { + case 0x00: { /* FSULE.W */ + DIP("FSULE.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSULEW, 2); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_CmpLE32Fx4, + getWReg(ws), + getWReg(wt)), + binop(Iop_CmpUN32Fx4, + getWReg(ws), + getWReg(wt)))); + break; + } + + case 0x01: { /* FSULE.D */ + DIP("FSULE.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSULED, 2); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_CmpLE64Fx2, + getWReg(ws), + getWReg(wt)), + binop(Iop_CmpUN64Fx2, + getWReg(ws), + getWReg(wt)))); + break; + } + + default: + return -1; + } + + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_3R_1B(UInt cins, UChar wd, UChar ws) { /* 3R (0x1B) */ + IRTemp t1, t2, t3, t4; + UShort operation; + UChar df, wt; + + operation = (cins & 0x03C00000) >> 22; + df = (cins & 0x00200000) >> 21; + wt = (cins & 0x001F0000) >> 16; + + switch (operation) { + case 0x00: { /* FADD.df */ + switch (df) { + case 0x00: { /* FADD.W */ + DIP("FADD.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FADDW, 2); + IRExpr *rm = get_IR_roundingmode_MSA(); + putWReg(wd, + triop(Iop_Add32Fx4, rm, + getWReg(ws), + getWReg(wt))); + break; + } + + case 0x01: { /* FADD.D */ + DIP("FADD.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FADDD, 2); + IRExpr *rm = get_IR_roundingmode_MSA(); + putWReg(wd, + triop(Iop_Add64Fx2, rm, + getWReg(ws), + getWReg(wt))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x01: { /* FSUB.df */ + switch (df) { + case 0x00: { /* FSUB.W */ + DIP("FSUB.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSUBW, 2); + IRExpr *rm = get_IR_roundingmode_MSA(); + putWReg(wd, + triop(Iop_Sub32Fx4, rm, + getWReg(ws), + getWReg(wt))); + break; + } + + case 0x01: { /* FSUB.D */ + DIP("FSUB.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSUBD, 2); + IRExpr *rm = get_IR_roundingmode_MSA(); + putWReg(wd, + triop(Iop_Sub64Fx2, rm, + getWReg(ws), + getWReg(wt))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x02: { /* FMUL.df */ + switch (df) { + case 0x00: { /* FMUL.W */ + DIP("FMUL.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FMULW, 2); + IRExpr *rm = get_IR_roundingmode_MSA(); + putWReg(wd, + triop(Iop_Mul32Fx4, rm, + getWReg(ws), + getWReg(wt))); + break; + } + + case 0x01: { /* FMUL.D */ + DIP("FMUL.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FMULW, 2); + IRExpr *rm = get_IR_roundingmode_MSA(); + putWReg(wd, + triop(Iop_Mul64Fx2, rm, + getWReg(ws), + getWReg(wt))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x03: { /* FDIV.df */ + switch (df) { + case 0x00: { /* FDIV.W */ + DIP("FDIV.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FDIVW, 2); + IRExpr *rm = get_IR_roundingmode_MSA(); + putWReg(wd, + triop(Iop_Div32Fx4, rm, + getWReg(ws), + getWReg(wt))); + break; + } + + case 0x01: { /* FDIV.D */ + DIP("FDIV.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FDIVD, 2); + IRExpr *rm = get_IR_roundingmode_MSA(); + putWReg(wd, + triop(Iop_Div64Fx2, rm, + getWReg(ws), + getWReg(wt))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x04: { /* FMADD.df */ + switch (df) { + case 0x00: { /* FMADD.W */ + DIP("FMADD.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FMADDW, 2); + IRExpr *rm = get_IR_roundingmode_MSA(); + IRTemp tmp[4]; + Int i; + + for (i = 0; i < 4; i++) { + tmp[i] = newTemp(Ity_F32); + assign(tmp[i], + qop(Iop_MAddF32, rm, + unop(Iop_ReinterpI32asF32, + binop(Iop_GetElem32x4, + getWReg(ws), + mkU8(i))), + unop(Iop_ReinterpI32asF32, + binop(Iop_GetElem32x4, + getWReg(wt), + mkU8(i))), + unop(Iop_ReinterpI32asF32, + binop(Iop_GetElem32x4, + getWReg(wd), + mkU8(i))))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + unop(Iop_ReinterpF32asI32, + mkexpr(tmp[3])), + unop(Iop_ReinterpF32asI32, + mkexpr(tmp[2]))), + binop(Iop_32HLto64, + unop(Iop_ReinterpF32asI32, + mkexpr(tmp[1])), + unop(Iop_ReinterpF32asI32, + mkexpr(tmp[0]))))); + break; + } + + case 0x01: { /* FMADD.D */ + DIP("FMADD.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FMADDW, 2); + IRExpr *rm = get_IR_roundingmode_MSA(); + IRTemp tmp[2]; + Int i; + + for (i = 0; i < 2; i++) { + tmp[i] = newTemp(Ity_F64); + assign(tmp[i], + qop(Iop_MAddF64, rm, + unop(Iop_ReinterpI64asF64, + binop(Iop_GetElem64x2, + getWReg(ws), + mkU8(i))), + unop(Iop_ReinterpI64asF64, + binop(Iop_GetElem64x2, + getWReg(wt), + mkU8(i))), + unop(Iop_ReinterpI64asF64, + binop(Iop_GetElem64x2, + getWReg(wd), + mkU8(i))))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + unop(Iop_ReinterpF64asI64, + mkexpr(tmp[1])), + unop(Iop_ReinterpF64asI64, + mkexpr(tmp[0])))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x05: { /* FMSUB.df */ + switch (df) { + case 0x00: { /* FMSUB.W */ + DIP("FMSUB.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FMADDW, 2); + IRExpr *rm = get_IR_roundingmode_MSA(); + IRTemp tmp[4]; + Int i; + + for (i = 0; i < 4; i++) { + tmp[i] = newTemp(Ity_F32); + assign(tmp[i], + qop(Iop_MSubF32, rm, + unop(Iop_ReinterpI32asF32, + binop(Iop_GetElem32x4, + getWReg(ws), + mkU8(i))), + unop(Iop_ReinterpI32asF32, + binop(Iop_GetElem32x4, + getWReg(wt), + mkU8(i))), + unop(Iop_ReinterpI32asF32, + binop(Iop_GetElem32x4, + getWReg(wd), + mkU8(i))))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + unop(Iop_ReinterpF32asI32, + mkexpr(tmp[3])), + unop(Iop_ReinterpF32asI32, + mkexpr(tmp[2]))), + binop(Iop_32HLto64, + unop(Iop_ReinterpF32asI32, + mkexpr(tmp[1])), + unop(Iop_ReinterpF32asI32, + mkexpr(tmp[0]))))); + break; + } + + case 0x01: { /* FMSUB.D */ + DIP("FMSUB.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FMADDD, 2); + IRExpr *rm = get_IR_roundingmode_MSA(); + IRTemp tmp[2]; + Int i; + + for (i = 0; i < 2; i++) { + tmp[i] = newTemp(Ity_F64); + assign(tmp[i], + qop(Iop_MSubF64, rm, + unop(Iop_ReinterpI64asF64, + binop(Iop_GetElem64x2, + getWReg(ws), + mkU8(i))), + unop(Iop_ReinterpI64asF64, + binop(Iop_GetElem64x2, + getWReg(wt), + mkU8(i))), + unop(Iop_ReinterpI64asF64, + binop(Iop_GetElem64x2, + getWReg(wd), + mkU8(i))))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + unop(Iop_ReinterpF64asI64, + mkexpr(tmp[1])), + unop(Iop_ReinterpF64asI64, + mkexpr(tmp[0])))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x07: { /* FEXP2.df */ + switch (df) { + case 0x00: { /* FEXP2.W */ + DIP("FEXP2.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FEXP2W, 2); + IRExpr *rm = get_IR_roundingmode_MSA(); + putWReg(wd, + triop(Iop_Scale2_32Fx4, rm, + getWReg(ws), + getWReg(wt))); + break; + } + + case 0x01: { /* FEXP2.D */ + DIP("FEXP2.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FEXP2D, 2); + IRExpr *rm = get_IR_roundingmode_MSA(); + putWReg(wd, + triop(Iop_Scale2_64Fx2, rm, + getWReg(ws), + getWReg(wt))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x08: { /* FEXDO.df */ + switch (df) { + case 0x00: { /* FEXDO.H */ + DIP("FEXDO.H w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FEXDOH, 2); + t1 = newTemp(Ity_I64); + t2 = newTemp(Ity_I64); + assign(t1, + unop(Iop_F32toF16x4, + getWReg(ws))); + assign(t2, + unop(Iop_F32toF16x4, + getWReg(wt))); + putWReg(wd, + binop(Iop_64HLtoV128, + mkexpr(t1), mkexpr(t2))); + break; + } + + case 0x01: { /* FEXDO.W */ + DIP("FEXDO.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FEXDOW, 2); + t1 = newTemp(Ity_I32); + t2 = newTemp(Ity_I32); + t3 = newTemp(Ity_I32); + t4 = newTemp(Ity_I32); + IRExpr *rm = get_IR_roundingmode_MSA(); + assign(t1, + unop(Iop_ReinterpF32asI32, + binop(Iop_F64toF32, rm, + unop(Iop_ReinterpI64asF64, + unop(Iop_V128to64, + getWReg(ws)))))); + assign(t2, + unop(Iop_ReinterpF32asI32, + binop(Iop_F64toF32, rm, + unop(Iop_ReinterpI64asF64, + unop(Iop_V128HIto64, + getWReg(ws)))))); + assign(t3, + unop(Iop_ReinterpF32asI32, + binop(Iop_F64toF32, rm, + unop(Iop_ReinterpI64asF64, + unop(Iop_V128to64, + getWReg(wt)))))); + assign(t4, + unop(Iop_ReinterpF32asI32, + binop(Iop_F64toF32, rm, + unop(Iop_ReinterpI64asF64, + unop(Iop_V128HIto64, + getWReg(wt)))))); + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + mkexpr(t2), mkexpr(t1)), + binop(Iop_32HLto64, + mkexpr(t4), mkexpr(t3)))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x0A: { /* FTQ.df */ + switch (df) { + case 0x00: { /* FTQ.H */ + DIP("FTQ.H w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FTQH, 2); + IRExpr *rm = get_IR_roundingmode_MSA(); + putWReg(wd, + triop(Iop_F32x4_2toQ16x8, rm, + getWReg(ws), + getWReg(wt))); + break; + } + + case 0x01: { /* FTQ.W */ + DIP("FTQ.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FTQW, 2); + IRExpr *rm = get_IR_roundingmode_MSA(); + putWReg(wd, + triop(Iop_F64x2_2toQ32x4, rm, + getWReg(ws), + getWReg(wt))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x0C: { /* FMIN.df */ + switch (df) { + case 0x00: { /* FMIN.W */ + DIP("FMIN.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FMINW, 2); + putWReg(wd, + binop(Iop_Min32Fx4, + getWReg(ws), + getWReg(wt))); + break; + } + + case 0x01: { /* FMIN.D */ + DIP("FMIN.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FMINW, 2); + putWReg(wd, + binop(Iop_Min64Fx2, + getWReg(ws), + getWReg(wt))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x0D: { /* FMIN_A.df */ + switch (df) { + case 0x00: { /* FMIN_A.W */ + DIP("FMIN_A.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FMINAW, 2); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, + binop(Iop_AndV128, + getWReg(ws), + binop(Iop_64HLtoV128, + mkU64(0x7FFFFFFF7FFFFFFF), + mkU64(0x7FFFFFFF7FFFFFFF)))); + assign(t2, + binop(Iop_AndV128, + getWReg(wt), + binop(Iop_64HLtoV128, + mkU64(0x7FFFFFFF7FFFFFFF), + mkU64(0x7FFFFFFF7FFFFFFF)))); + assign(t3, + binop(Iop_Min32Fx4, + mkexpr(t2), mkexpr(t1))); + assign(t4, + binop(Iop_AndV128, + binop(Iop_AndV128, + unop(Iop_NotV128, + binop(Iop_CmpUN32Fx4, + mkexpr(t3), + mkexpr(t3))), + binop(Iop_OrV128, + binop(Iop_AndV128, + binop(Iop_CmpEQ32Fx4, + mkexpr(t1), + mkexpr(t2)), + binop(Iop_OrV128, + getWReg(ws), + getWReg(wt))), + binop(Iop_OrV128, + binop(Iop_AndV128, + binop(Iop_OrV128, + binop(Iop_CmpUN32Fx4, + mkexpr(t1), + mkexpr(t1)), + binop(Iop_CmpLT32Fx4, + mkexpr(t3), + mkexpr(t1))), + getWReg(wt)), + binop(Iop_AndV128, + binop(Iop_OrV128, + binop(Iop_CmpUN32Fx4, + mkexpr(t2), + mkexpr(t2)), + binop(Iop_CmpLT32Fx4, + mkexpr(t3), + mkexpr(t2))), + getWReg(ws))))), + binop(Iop_64HLtoV128, + mkU64(0x8000000080000000), + mkU64(0x8000000080000000)))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t3), mkexpr(t4))); + break; + } + + case 0x01: { /* FMIN_A.D */ + DIP("FMIN_A.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FMINAD, 2); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, + binop(Iop_AndV128, + getWReg(ws), + binop(Iop_64HLtoV128, + mkU64(0x7FFFFFFFFFFFFFFF), + mkU64(0x7FFFFFFFFFFFFFFF)))); + assign(t2, + binop(Iop_AndV128, + getWReg(wt), + binop(Iop_64HLtoV128, + mkU64(0x7FFFFFFFFFFFFFFF), + mkU64(0x7FFFFFFFFFFFFFFF)))); + assign(t3, + binop(Iop_Min64Fx2, + mkexpr(t2), mkexpr(t1))); + assign(t4, + binop(Iop_AndV128, + binop(Iop_AndV128, + unop(Iop_NotV128, + binop(Iop_CmpUN64Fx2, + mkexpr(t3), + mkexpr(t3))), + binop(Iop_OrV128, + binop(Iop_AndV128, + binop(Iop_CmpEQ64Fx2, + mkexpr(t1), + mkexpr(t2)), + binop(Iop_OrV128, + getWReg(ws), + getWReg(wt))), + binop(Iop_OrV128, + binop(Iop_AndV128, + binop(Iop_OrV128, + binop(Iop_CmpUN64Fx2, + mkexpr(t1), + mkexpr(t1)), + binop(Iop_CmpLT64Fx2, + mkexpr(t3), + mkexpr(t1))), + getWReg(wt)), + binop(Iop_AndV128, + binop(Iop_OrV128, + binop(Iop_CmpUN64Fx2, + mkexpr(t2), + mkexpr(t2)), + binop(Iop_CmpLT64Fx2, + mkexpr(t3), + mkexpr(t2))), + getWReg(ws))))), + binop(Iop_64HLtoV128, + mkU64(0x8000000000000000), + mkU64(0x8000000000000000)))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t3), mkexpr(t4))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x0E: { /* FMAX.df */ + switch (df) { + case 0x00: { /* FMAX.W */ + DIP("FMAX.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FMAXW, 2); + putWReg(wd, + binop(Iop_Max32Fx4, + getWReg(ws), + getWReg(wt))); + break; + } + + case 0x01: { /* FMAX.D */ + DIP("FMAX.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FMAXW, 2); + putWReg(wd, + binop(Iop_Max64Fx2, + getWReg(ws), + getWReg(wt))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x0F: { /* FMAX_A.df */ + switch (df) { + case 0x00: { /* FMAX_A.W */ + DIP("FMAX_A.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FMAXAW, 2); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, + binop(Iop_AndV128, + getWReg(ws), + binop(Iop_64HLtoV128, + mkU64(0x7FFFFFFF7FFFFFFF), + mkU64(0x7FFFFFFF7FFFFFFF)))); + assign(t2, + binop(Iop_AndV128, + getWReg(wt), + binop(Iop_64HLtoV128, + mkU64(0x7FFFFFFF7FFFFFFF), + mkU64(0x7FFFFFFF7FFFFFFF)))); + assign(t3, + binop(Iop_Max32Fx4, + mkexpr(t2), mkexpr(t1))); + assign(t4, + binop(Iop_AndV128, + binop(Iop_AndV128, + unop(Iop_NotV128, + binop(Iop_CmpUN32Fx4, + mkexpr(t3), + mkexpr(t3))), + binop(Iop_OrV128, + binop(Iop_AndV128, + binop(Iop_CmpEQ32Fx4, + mkexpr(t1), + mkexpr(t2)), + binop(Iop_AndV128, + getWReg(ws), + getWReg(wt))), + binop(Iop_OrV128, + binop(Iop_AndV128, + binop(Iop_OrV128, + binop(Iop_CmpUN32Fx4, + mkexpr(t1), + mkexpr(t1)), + binop(Iop_CmpLT32Fx4, + mkexpr(t1), + mkexpr(t3))), + getWReg(wt)), + binop(Iop_AndV128, + binop(Iop_OrV128, + binop(Iop_CmpUN32Fx4, + mkexpr(t2), + mkexpr(t2)), + binop(Iop_CmpLT32Fx4, + mkexpr(t2), + mkexpr(t3))), + getWReg(ws))))), + binop(Iop_64HLtoV128, + mkU64(0x8000000080000000), + mkU64(0x8000000080000000)))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t3), mkexpr(t4))); + break; + } + + case 0x01: { /* FMAX_A.D */ + DIP("FMAX_A.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FMAXAD, 2); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + assign(t1, + binop(Iop_AndV128, + getWReg(ws), + binop(Iop_64HLtoV128, + mkU64(0x7FFFFFFFFFFFFFFF), + mkU64(0x7FFFFFFFFFFFFFFF)))); + assign(t2, + binop(Iop_AndV128, + getWReg(wt), + binop(Iop_64HLtoV128, + mkU64(0x7FFFFFFFFFFFFFFF), + mkU64(0x7FFFFFFFFFFFFFFF)))); + assign(t3, + binop(Iop_Max64Fx2, + mkexpr(t2), mkexpr(t1))); + assign(t4, + binop(Iop_AndV128, + binop(Iop_AndV128, + unop(Iop_NotV128, + binop(Iop_CmpUN64Fx2, + mkexpr(t3), + mkexpr(t3))), + binop(Iop_OrV128, + binop(Iop_AndV128, + binop(Iop_CmpEQ64Fx2, + mkexpr(t1), + mkexpr(t2)), + binop(Iop_AndV128, + getWReg(ws), + getWReg(wt))), + binop(Iop_OrV128, + binop(Iop_AndV128, + binop(Iop_OrV128, + binop(Iop_CmpUN64Fx2, + mkexpr(t1), + mkexpr(t1)), + binop(Iop_CmpLT64Fx2, + mkexpr(t1), + mkexpr(t3))), + getWReg(wt)), + binop(Iop_AndV128, + binop(Iop_OrV128, + binop(Iop_CmpUN64Fx2, + mkexpr(t2), + mkexpr(t2)), + binop(Iop_CmpLT64Fx2, + mkexpr(t2), + mkexpr(t3))), + getWReg(ws))))), + binop(Iop_64HLtoV128, + mkU64(0x8000000000000000), + mkU64(0x8000000000000000)))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t3), mkexpr(t4))); + break; + } + + default: + return -1; + } + + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_3R_1C(UInt cins, UChar wd, UChar ws) { /* 3R (0x1C) */ + IRTemp t1, t2, t3, t4, t5, t6; + UShort operation; + UChar df, wt; + + operation = (cins & 0x03C00000) >> 22; + df = (cins & 0x00200000) >> 21; + wt = (cins & 0x001F0000) >> 16; + + switch (operation) { + case 0x01: { /* FCOR.df */ + switch (df) { + case 0x00: { /* FCOR.W */ + DIP("FCOR.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCORW, 2); + putWReg(wd, + unop(Iop_NotV128, + binop(Iop_CmpUN32Fx4, + getWReg(ws), + getWReg(wt)))); + break; + } + + case 0x01: { /* FCOR.D */ + DIP("FCOR.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCORD, 2); + putWReg(wd, + unop(Iop_NotV128, + binop(Iop_CmpUN64Fx2, + getWReg(ws), + getWReg(wt)))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x02: { /* FCUNE.df */ + switch (df) { + case 0x00: { /* FCUNE.W */ + DIP("FCUNE.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCUNEW, 2); + putWReg(wd, + unop(Iop_NotV128, + binop(Iop_CmpEQ32Fx4, + getWReg(ws), + getWReg(wt)))); + break; + } + + case 0x01: { /* FCUNE.D */ + DIP("FCUNE.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCUNED, 2); + putWReg(wd, + unop(Iop_NotV128, + binop(Iop_CmpEQ64Fx2, + getWReg(ws), + getWReg(wt)))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x03: { /* FCNE.df */ + switch (df) { + case 0x00: { /* FCNE.W */ + DIP("FCNE.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCNEW, 2); + putWReg(wd, + binop(Iop_XorV128, + unop(Iop_NotV128, + binop(Iop_CmpEQ32Fx4, + getWReg(ws), + getWReg(wt))), + binop(Iop_CmpUN32Fx4, + getWReg(ws), + getWReg(wt)))); + break; + } + + case 0x01: { /* FCNE.D */ + DIP("FCNE.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FCNED, 2); + putWReg(wd, + binop(Iop_XorV128, + unop(Iop_NotV128, + binop(Iop_CmpEQ64Fx2, + getWReg(ws), + getWReg(wt))), + binop(Iop_CmpUN64Fx2, + getWReg(ws), + getWReg(wt)))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x04: { /* MUL_Q.df */ + switch (df) { + case 0x00: { /* MUL_Q.H */ + DIP("MUL_Q.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_QDMulHi16Sx8, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* MUL_Q.W */ + DIP("MUL_Q.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + binop(Iop_QDMulHi32Sx4, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x05: { /* MADD_Q.df */ + switch (df) { + case 0x00: { /* MADD_Q.W */ + DIP("MADD_Q.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + t5 = newTemp(Ity_V128); + t6 = newTemp(Ity_V128); + assign(t1, // even + binop(Iop_SarN32x4, + binop(Iop_InterleaveEvenLanes16x8, + getWReg(ws), + getWReg(ws)), + mkU8(16))); + assign(t2, // odd + binop(Iop_SarN32x4, + getWReg(ws), mkU8(16))); + assign(t3, // even + binop(Iop_SarN32x4, + binop(Iop_InterleaveEvenLanes16x8, + getWReg(wt), + getWReg(wt)), + mkU8(16))); + assign(t4, // odd + binop(Iop_SarN32x4, + getWReg(wt), mkU8(16))); + assign(t5, + binop(Iop_Add32x4, + binop(Iop_ShlN32x4, + binop(Iop_SarN32x4, + binop(Iop_InterleaveEvenLanes16x8, + getWReg(wd), + getWReg(wd)), + mkU8(16)), + mkU8(15)), + binop(Iop_Mul32x4, + mkexpr(t1), mkexpr(t3)))); + assign(t6, + binop(Iop_Add32x4, + binop(Iop_ShlN32x4, + binop(Iop_SarN32x4, + getWReg(wd), + mkU8(16)), + mkU8(15)), + binop(Iop_Mul32x4, + mkexpr(t2), mkexpr(t4)))); + putWReg(wd, + binop(Iop_InterleaveEvenLanes16x8, + binop(Iop_QandQSarNnarrow32Sto16Sx4, + mkexpr(t6), mkU8(15)), + binop(Iop_QandQSarNnarrow32Sto16Sx4, + mkexpr(t5), mkU8(15)))); + break; + } + + case 0x01: { /* MADD_Q.W */ + DIP("MADD_Q.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + t5 = newTemp(Ity_V128); + t6 = newTemp(Ity_V128); + assign(t1, // even + binop(Iop_SarN64x2, + binop(Iop_InterleaveEvenLanes32x4, + getWReg(ws), + getWReg(ws)), + mkU8(32))); + assign(t2, // odd + binop(Iop_SarN64x2, + getWReg(ws), mkU8(32))); + assign(t3, // even + binop(Iop_SarN64x2, + binop(Iop_InterleaveEvenLanes32x4, + getWReg(wt), + getWReg(wt)), + mkU8(32))); + assign(t4, // odd + binop(Iop_SarN64x2, + getWReg(wt), mkU8(32))); + assign(t5, + binop(Iop_Add64x2, + binop(Iop_ShlN64x2, + binop(Iop_SarN64x2, + binop(Iop_InterleaveEvenLanes32x4, + getWReg(wd), + getWReg(wd)), + mkU8(32)), + mkU8(31)), + binop(Iop_64HLtoV128, + binop(Iop_Mul64, + unop(Iop_V128HIto64, + mkexpr(t1)), + unop(Iop_V128HIto64, + mkexpr(t3))), + binop(Iop_Mul64, + unop(Iop_V128to64, + mkexpr(t1)), + unop(Iop_V128to64, + mkexpr(t3)))))); + assign(t6, + binop(Iop_Add64x2, + binop(Iop_ShlN64x2, + binop(Iop_SarN64x2, + getWReg(wd), + mkU8(32)), + mkU8(31)), + binop(Iop_64HLtoV128, + binop(Iop_Mul64, + unop(Iop_V128HIto64, + mkexpr(t2)), + unop(Iop_V128HIto64, + mkexpr(t4))), + binop(Iop_Mul64, + unop(Iop_V128to64, + mkexpr(t2)), + unop(Iop_V128to64, + mkexpr(t4)))))); + putWReg(wd, + binop(Iop_InterleaveEvenLanes32x4, + binop(Iop_QandQSarNnarrow64Sto32Sx2, + mkexpr(t6), mkU8(31)), + binop(Iop_QandQSarNnarrow64Sto32Sx2, + mkexpr(t5), mkU8(31)))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x06: { /* MSUB_Q.df */ + switch (df) { + case 0x00: { /* MSUB_Q.H */ + DIP("MSUB_Q.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + t5 = newTemp(Ity_V128); + t6 = newTemp(Ity_V128); + assign(t1, // even + binop(Iop_SarN32x4, + binop(Iop_InterleaveEvenLanes16x8, + getWReg(ws), + getWReg(ws)), + mkU8(16))); + assign(t2, // odd + binop(Iop_SarN32x4, + getWReg(ws), mkU8(16))); + assign(t3, // even + binop(Iop_SarN32x4, + binop(Iop_InterleaveEvenLanes16x8, + getWReg(wt), + getWReg(wt)), + mkU8(16))); + assign(t4, // odd + binop(Iop_SarN32x4, + getWReg(wt), mkU8(16))); + assign(t5, + binop(Iop_Sub32x4, + binop(Iop_ShlN32x4, + binop(Iop_SarN32x4, + binop(Iop_InterleaveEvenLanes16x8, + getWReg(wd), + getWReg(wd)), + mkU8(16)), + mkU8(15)), + binop(Iop_Mul32x4, + mkexpr(t1), mkexpr(t3)))); + assign(t6, + binop(Iop_Sub32x4, + binop(Iop_ShlN32x4, + binop(Iop_SarN32x4, + getWReg(wd), + mkU8(16)), + mkU8(15)), + binop(Iop_Mul32x4, + mkexpr(t2), mkexpr(t4)))); + putWReg(wd, + binop(Iop_InterleaveEvenLanes16x8, + binop(Iop_QandQSarNnarrow32Sto16Sx4, + mkexpr(t6), mkU8(15)), + binop(Iop_QandQSarNnarrow32Sto16Sx4, + mkexpr(t5), mkU8(15)))); + break; + } + + case 0x01: { /* MSUB_Q.W */ + DIP("MSUB_Q.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + t5 = newTemp(Ity_V128); + t6 = newTemp(Ity_V128); + assign(t1, // even + binop(Iop_SarN64x2, + binop(Iop_InterleaveEvenLanes32x4, + getWReg(ws), + getWReg(ws)), + mkU8(32))); + assign(t2, // odd + binop(Iop_SarN64x2, + getWReg(ws), mkU8(32))); + assign(t3, // even + binop(Iop_SarN64x2, + binop(Iop_InterleaveEvenLanes32x4, + getWReg(wt), + getWReg(wt)), + mkU8(32))); + assign(t4, // odd + binop(Iop_SarN64x2, + getWReg(wt), mkU8(32))); + assign(t5, + binop(Iop_Sub64x2, + binop(Iop_ShlN64x2, + binop(Iop_SarN64x2, + binop(Iop_InterleaveEvenLanes32x4, + getWReg(wd), + getWReg(wd)), + mkU8(32)), + mkU8(31)), + binop(Iop_64HLtoV128, + binop(Iop_Mul64, + unop(Iop_V128HIto64, + mkexpr(t1)), + unop(Iop_V128HIto64, + mkexpr(t3))), + binop(Iop_Mul64, + unop(Iop_V128to64, + mkexpr(t1)), + unop(Iop_V128to64, + mkexpr(t3)))))); + assign(t6, + binop(Iop_Sub64x2, + binop(Iop_ShlN64x2, + binop(Iop_SarN64x2, + getWReg(wd), + mkU8(32)), + mkU8(31)), + binop(Iop_64HLtoV128, + binop(Iop_Mul64, + unop(Iop_V128HIto64, + mkexpr(t2)), + unop(Iop_V128HIto64, + mkexpr(t4))), + binop(Iop_Mul64, + unop(Iop_V128to64, + mkexpr(t2)), + unop(Iop_V128to64, + mkexpr(t4)))))); + putWReg(wd, + binop(Iop_InterleaveEvenLanes32x4, + binop(Iop_QandQSarNnarrow64Sto32Sx2, + mkexpr(t6), mkU8(31)), + binop(Iop_QandQSarNnarrow64Sto32Sx2, + mkexpr(t5), mkU8(31)))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x09: { /* FSOR.df */ + switch (df) { + case 0x00: { /* FSOR.W */ + DIP("FSOR.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSORW, 2); + putWReg(wd, + unop(Iop_NotV128, + binop(Iop_CmpUN32Fx4, + getWReg(ws), + getWReg(wt)))); + break; + } + + case 0x01: { /* FSOR.D */ + DIP("FSOR.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSORD, 2); + putWReg(wd, + unop(Iop_NotV128, + binop(Iop_CmpUN64Fx2, + getWReg(ws), + getWReg(wt)))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x0A: { /* FSUNE.df */ + switch (df) { + case 0x00: { /* FSUNE.W */ + DIP("FSUNE.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSUNEW, 2); + putWReg(wd, + unop(Iop_NotV128, + binop(Iop_CmpEQ32Fx4, + getWReg(ws), + getWReg(wt)))); + break; + } + + case 0x01: { /* FSUNE.D */ + DIP("FSUNE.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSUNED, 2); + putWReg(wd, + unop(Iop_NotV128, + binop(Iop_CmpEQ64Fx2, + getWReg(ws), + getWReg(wt)))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x0B: { /* FSNE.df */ + switch (df) { + case 0x00: { /* FSNE.W */ + DIP("FSNE.W w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSNEW, 2); + putWReg(wd, + binop(Iop_XorV128, + unop(Iop_NotV128, + binop(Iop_CmpEQ32Fx4, + getWReg(ws), + getWReg(wt))), + binop(Iop_CmpUN32Fx4, + getWReg(ws), + getWReg(wt)))); + break; + } + + case 0x01: { /* FSNE.D */ + DIP("FSNE.D w%d, w%d, w%d", wd, ws, wt); + calculateMSACSR(ws, wt, FSNED, 2); + putWReg(wd, + binop(Iop_XorV128, + unop(Iop_NotV128, + binop(Iop_CmpEQ64Fx2, + getWReg(ws), + getWReg(wt))), + binop(Iop_CmpUN64Fx2, + getWReg(ws), + getWReg(wt)))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x0C: { /* MULR_Q.df */ + switch (df) { + case 0x00: { /* MULR_Q.H */ + DIP("MULR_Q.H w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_QRDMulHi16Sx8, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* MULR_Q.W */ + DIP("MULR_Q.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_QRDMulHi32Sx4, + mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + break; + } + + case 0x0D: { /* MADDR_Q.df */ + switch (df) { + case 0x00: { /* MADDR_Q.W */ + DIP("MADDR_Q.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + t5 = newTemp(Ity_V128); + t6 = newTemp(Ity_V128); + assign(t1, // even + binop(Iop_SarN32x4, + binop(Iop_InterleaveEvenLanes16x8, + getWReg(ws), + getWReg(ws)), + mkU8(16))); + assign(t2, // odd + binop(Iop_SarN32x4, + getWReg(ws), mkU8(16))); + assign(t3, // even + binop(Iop_SarN32x4, + binop(Iop_InterleaveEvenLanes16x8, + getWReg(wt), + getWReg(wt)), + mkU8(16))); + assign(t4, // odd + binop(Iop_SarN32x4, + getWReg(wt), mkU8(16))); + assign(t5, + binop(Iop_Add32x4, + binop(Iop_ShlN32x4, + binop(Iop_SarN32x4, + binop(Iop_InterleaveEvenLanes16x8, + getWReg(wd), + getWReg(wd)), + mkU8(16)), + mkU8(15)), + binop(Iop_Mul32x4, + mkexpr(t1), mkexpr(t3)))); + assign(t6, + binop(Iop_Add32x4, + binop(Iop_ShlN32x4, + binop(Iop_SarN32x4, + getWReg(wd), + mkU8(16)), + mkU8(15)), + binop(Iop_Mul32x4, + mkexpr(t2), mkexpr(t4)))); + putWReg(wd, + binop(Iop_InterleaveEvenLanes16x8, + binop(Iop_QandQRSarNnarrow32Sto16Sx4, + mkexpr(t6), mkU8(15)), + binop(Iop_QandQRSarNnarrow32Sto16Sx4, + mkexpr(t5), mkU8(15)))); + break; + } + + case 0x01: { /* MADDR_Q.D */ + DIP("MADDR_Q.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + t5 = newTemp(Ity_V128); + t6 = newTemp(Ity_V128); + assign(t1, // even + binop(Iop_SarN64x2, + binop(Iop_InterleaveEvenLanes32x4, + getWReg(ws), + getWReg(ws)), + mkU8(32))); + assign(t2, // odd + binop(Iop_SarN64x2, + getWReg(ws), mkU8(32))); + assign(t3, // even + binop(Iop_SarN64x2, + binop(Iop_InterleaveEvenLanes32x4, + getWReg(wt), + getWReg(wt)), + mkU8(32))); + assign(t4, // odd + binop(Iop_SarN64x2, + getWReg(wt), mkU8(32))); + assign(t5, + binop(Iop_Add64x2, + binop(Iop_ShlN64x2, + binop(Iop_SarN64x2, + binop(Iop_InterleaveEvenLanes32x4, + getWReg(wd), + getWReg(wd)), + mkU8(32)), + mkU8(31)), + binop(Iop_64HLtoV128, + binop(Iop_Mul64, + unop(Iop_V128HIto64, + mkexpr(t1)), + unop(Iop_V128HIto64, + mkexpr(t3))), + binop(Iop_Mul64, + unop(Iop_V128to64, + mkexpr(t1)), + unop(Iop_V128to64, + mkexpr(t3)))))); + assign(t6, + binop(Iop_Add64x2, + binop(Iop_ShlN64x2, + binop(Iop_SarN64x2, + getWReg(wd), + mkU8(32)), + mkU8(31)), + binop(Iop_64HLtoV128, + binop(Iop_Mul64, + unop(Iop_V128HIto64, + mkexpr(t2)), + unop(Iop_V128HIto64, + mkexpr(t4))), + binop(Iop_Mul64, + unop(Iop_V128to64, + mkexpr(t2)), + unop(Iop_V128to64, + mkexpr(t4)))))); + putWReg(wd, + binop(Iop_InterleaveEvenLanes32x4, + binop(Iop_QandQRSarNnarrow64Sto32Sx2, + mkexpr(t6), mkU8(31)), + binop(Iop_QandQRSarNnarrow64Sto32Sx2, + mkexpr(t5), mkU8(31)))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x0E: { /* MSUBR_Q.df */ + switch (df) { + case 0x00: { /* MSUBR_Q.W */ + DIP("MSUBR_Q.W w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + t5 = newTemp(Ity_V128); + t6 = newTemp(Ity_V128); + assign(t1, // even + binop(Iop_SarN32x4, + binop(Iop_InterleaveEvenLanes16x8, + getWReg(ws), + getWReg(ws)), + mkU8(16))); + assign(t2, // odd + binop(Iop_SarN32x4, + getWReg(ws), mkU8(16))); + assign(t3, // even + binop(Iop_SarN32x4, + binop(Iop_InterleaveEvenLanes16x8, + getWReg(wt), + getWReg(wt)), + mkU8(16))); + assign(t4, // odd + binop(Iop_SarN32x4, + getWReg(wt), mkU8(16))); + assign(t5, + binop(Iop_Sub32x4, + binop(Iop_ShlN32x4, + binop(Iop_SarN32x4, + binop(Iop_InterleaveEvenLanes16x8, + getWReg(wd), + getWReg(wd)), + mkU8(16)), + mkU8(15)), + binop(Iop_Mul32x4, + mkexpr(t1), mkexpr(t3)))); + assign(t6, + binop(Iop_Sub32x4, + binop(Iop_ShlN32x4, + binop(Iop_SarN32x4, + getWReg(wd), + mkU8(16)), + mkU8(15)), + binop(Iop_Mul32x4, + mkexpr(t2), mkexpr(t4)))); + putWReg(wd, + binop(Iop_InterleaveEvenLanes16x8, + binop(Iop_QandQRSarNnarrow32Sto16Sx4, + mkexpr(t6), mkU8(15)), + binop(Iop_QandQRSarNnarrow32Sto16Sx4, + mkexpr(t5), mkU8(15)))); + break; + } + + case 0x01: { /* MSUBR_Q.D */ + DIP("MSUBR_Q.D w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + t5 = newTemp(Ity_V128); + t6 = newTemp(Ity_V128); + assign(t1, // even + binop(Iop_SarN64x2, + binop(Iop_InterleaveEvenLanes32x4, + getWReg(ws), + getWReg(ws)), + mkU8(32))); + assign(t2, // odd + binop(Iop_SarN64x2, + getWReg(ws), mkU8(32))); + assign(t3, // even + binop(Iop_SarN64x2, + binop(Iop_InterleaveEvenLanes32x4, + getWReg(wt), + getWReg(wt)), + mkU8(32))); + assign(t4, // odd + binop(Iop_SarN64x2, + getWReg(wt), mkU8(32))); + assign(t5, + binop(Iop_Sub64x2, + binop(Iop_ShlN64x2, + binop(Iop_SarN64x2, + binop(Iop_InterleaveEvenLanes32x4, + getWReg(wd), + getWReg(wd)), + mkU8(32)), + mkU8(31)), + binop(Iop_64HLtoV128, + binop(Iop_Mul64, + unop(Iop_V128HIto64, + mkexpr(t1)), + unop(Iop_V128HIto64, + mkexpr(t3))), + binop(Iop_Mul64, + unop(Iop_V128to64, + mkexpr(t1)), + unop(Iop_V128to64, + mkexpr(t3)))))); + assign(t6, + binop(Iop_Sub64x2, + binop(Iop_ShlN64x2, + binop(Iop_SarN64x2, + getWReg(wd), + mkU8(32)), + mkU8(31)), + binop(Iop_64HLtoV128, + binop(Iop_Mul64, + unop(Iop_V128HIto64, + mkexpr(t2)), + unop(Iop_V128HIto64, + mkexpr(t4))), + binop(Iop_Mul64, + unop(Iop_V128to64, + mkexpr(t2)), + unop(Iop_V128to64, + mkexpr(t4)))))); + putWReg(wd, + binop(Iop_InterleaveEvenLanes32x4, + binop(Iop_QandQRSarNnarrow64Sto32Sx2, + mkexpr(t6), mkU8(31)), + binop(Iop_QandQRSarNnarrow64Sto32Sx2, + mkexpr(t5), mkU8(31)))); + break; + } + + default: + return -1; + } + + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_ELM(UInt cins, UChar wd, UChar ws) { /* ELM (0x19) */ + IRTemp t1, t2, t3, t4, t5; + IRType ty; + UShort operation; + UChar df, n; + + operation = (cins & 0x03C00000) >> 22; + ty = mode64 ? Ity_I64 : Ity_I32; + + switch ((cins & 0x03FF0000) >> 16) { + case 0x07E: /* CFCMSA */ + DIP("CFCMSA r%d, c%d", wd, ws); + + switch (ws) { + case 0: { /* MSAIR */ + IRDirty *d; + t1 = newTemp(Ity_I32); + /* IRExpr_BBPTR() => + Need to pass pointer to + guest state to helper. */ + d = unsafeIRDirty_1_N(t1, 0, + "mips_dirtyhelper_get_MSAIR", + &mips_dirtyhelper_get_MSAIR, + mkIRExprVec_0()); + /* d->nFxState = 0; */ + stmt(IRStmt_Dirty(d)); + putIReg(wd, + mkWidenFrom32(ty, mkexpr(t1), True)); + break; + } + + case 1: /* MSACSR */ + putIReg(wd, + mkWidenFrom32(ty, getMSACSR(), True)); + break; + + default: + putIReg(wd, + mkWidenFrom32(ty, mkU32(0), False)); + break; + } + + break; + + case 0x03E: /* CTCMSA */ + DIP("CTCMSA r%d, c%d", ws, wd); + + if (wd == 1) { /* MSACSR */ + putMSACSR( + binop(Iop_And32, mkNarrowTo32(ty, getIReg(ws)), + mkU32(0x1FFFFFF))); + } + + break; + + case 0x0BE: /* MOVE.V */ + DIP("MOVE.V w%d, w%d", ws, wd); + putWReg(wd, getWReg(ws)); + break; + + default: + df = (cins & 0x003F0000) >> 16; + if ((df & 0x38) == 0x38) { // 11100n; dw + n = df & 0x01; + df = 0x38; + } else if ((df & 0x30) == 0x30) { // 1100nn; w + n = df & 0x03; + df = 0x30; + } else if ((df & 0x20) == 0x20) { // 100nnn; hw + n = df & 0x07; + df = 0x20; + } else if ((df & 0x00) == 0x00) { // 00nnnn; b + n = df & 0x0F; + df = 0x00; + } + + switch (operation) { + case 0x00: /* SLDI.df */ + switch (df) { + case 0x00: /* SLDI.B */ + DIP("SLDI.B w%d, w%d[%d]", wd, ws, n); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, + binop(Iop_ShrV128, + getWReg(ws), + mkU8(n << 3))); + assign(t2, + binop(Iop_ShlV128, + getWReg(wd), + mkU8(n ? + (16 - n) << 3 : 0))); + putWReg(wd, + binop(Iop_OrV128, mkexpr(t1), mkexpr(t2))); + break; + + case 0x20: /* SLDI.H */ + DIP("SLDI.H w%d, w%d[%d]", wd, ws, n); + + if (n == 0) { + putWReg(wd, getWReg(ws)); + } else { + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, + binop(Iop_ShrN64x2, + getWReg(ws), + mkU8(n << 3))); + assign(t2, + binop(Iop_ShlN64x2, + getWReg(wd), + mkU8((8 - n) << 3))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t1), + mkexpr(t2))); + } + + break; + + case 0x30: /* SLDI.W */ + DIP("SLDI.W w%d, w%d[%d]", wd, ws, n); + + if (n == 0) { + putWReg(wd, getWReg(ws)); + } else { + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, + binop(Iop_ShrN32x4, + getWReg(ws), + mkU8(n << 3))); + assign(t2, + binop(Iop_ShlN32x4, + getWReg(wd), + mkU8((4 - n) << 3))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t1), + mkexpr(t2))); + } + + break; + + case 0x38: /* SLDI.D */ + DIP("SLDI.D w%d, w%d[%d]", wd, ws, n); + + if (n == 0) { + putWReg(wd, getWReg(ws)); + } else { + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, + binop(Iop_ShrN16x8, + getWReg(ws), + mkU8(n << 3))); + assign(t2, + binop(Iop_ShlN16x8, + getWReg(wd), + mkU8((2 - n) << 3))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t1), + mkexpr(t2))); + } + + break; + + default: + return -1; + } + + break; + + case 0x01: /* SPLATI.df */ + switch (df) { + case 0x00: { /* SPLATI.B */ + DIP("SPLATI.B w%d, w%d[%d]", wd, ws, n); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + + if (n & 1) + assign(t1, + binop(Iop_InterleaveOddLanes8x16, + getWReg(ws), + getWReg(ws))); + else + assign(t1, + binop(Iop_InterleaveEvenLanes8x16, + getWReg(ws), + getWReg(ws))); + + n /= 2; + + if (n & 1) + assign(t2, + binop(Iop_InterleaveOddLanes16x8, + mkexpr(t1), mkexpr(t1))); + else + assign(t2, + binop(Iop_InterleaveEvenLanes16x8, + mkexpr(t1), mkexpr(t1))); + + n /= 2; + + if (n & 1) + assign(t3, + binop(Iop_InterleaveOddLanes32x4, + mkexpr(t2), mkexpr(t2))); + else + assign(t3, + binop(Iop_InterleaveEvenLanes32x4, + mkexpr(t2), mkexpr(t2))); + + n /= 2; + + if (n & 1) + assign(t4, + binop(Iop_InterleaveHI64x2, + mkexpr(t3), mkexpr(t3))); + else + assign(t4, + binop(Iop_InterleaveLO64x2, + mkexpr(t3), mkexpr(t3))); + + putWReg(wd, mkexpr(t4)); + break; + } + + case 0x20: { /* SPLATI.H */ + DIP("SPLATI.H w%d, w%d[%d]", wd, ws, n); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + + if (n & 1) + assign(t1, + binop(Iop_InterleaveOddLanes16x8, + getWReg(ws), + getWReg(ws))); + else + assign(t1, + binop(Iop_InterleaveEvenLanes16x8, + getWReg(ws), + getWReg(ws))); + + n /= 2; + + if (n & 1) + assign(t2, + binop(Iop_InterleaveOddLanes32x4, + mkexpr(t1), mkexpr(t1))); + else + assign(t2, + binop(Iop_InterleaveEvenLanes32x4, + mkexpr(t1), mkexpr(t1))); + + n /= 2; + + if (n & 1) + assign(t3, + binop(Iop_InterleaveHI64x2, + mkexpr(t2), mkexpr(t2))); + else + assign(t3, + binop(Iop_InterleaveLO64x2, + mkexpr(t2), mkexpr(t2))); + + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x30: { /* SPLATI.W */ + DIP("SPLATI.W w%d, w%d[%d]", wd, ws, n); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + + if (n & 1) + assign(t2, + binop(Iop_InterleaveOddLanes32x4, + mkexpr(t1), mkexpr(t1))); + else + assign(t2, + binop(Iop_InterleaveEvenLanes32x4, + mkexpr(t1), mkexpr(t1))); + + n /= 2; + + if (n & 1) + assign(t3, + binop(Iop_InterleaveHI64x2, + mkexpr(t2), mkexpr(t2))); + else + assign(t3, + binop(Iop_InterleaveLO64x2, + mkexpr(t2), mkexpr(t2))); + + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x38: /* SPLATI.D */ + DIP("SPLATI.D w%d, w%d[%d]", wd, ws, n); + t1 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + + if (n) + assign(t3, + binop(Iop_InterleaveHI64x2, + mkexpr(t1), mkexpr(t1))); + else + assign(t3, + binop(Iop_InterleaveLO64x2, + mkexpr(t1), mkexpr(t1))); + + putWReg(wd, mkexpr(t3)); + break; + + default: + return -1; + } + + break; + + case 0x02: /* COPY_S.df */ + switch (df) { + case 0x00: /* COPY_S.B */ + DIP("COPY_S.B r%d, w%d[%d]", wd, ws, n); + t1 = newTemp(Ity_I8); + + switch (n) { + case 0: + assign(t1, + unop(Iop_32to8, + unop(Iop_V128to32, + getWReg(ws)))); + break; + + case 1: + assign(t1, + unop(Iop_16HIto8, + unop(Iop_32to16, + unop(Iop_V128to32, + getWReg(ws))))); + break; + + case 2: + assign(t1, + unop(Iop_16to8, + unop(Iop_32HIto16, + unop(Iop_64to32, + unop(Iop_V128to64, + getWReg(ws)))))); + break; + + case 3: + assign(t1, + unop(Iop_16HIto8, + unop(Iop_32HIto16, + unop(Iop_64to32, + unop(Iop_V128to64, + getWReg(ws)))))); + break; + + case 4: + assign(t1, + unop(Iop_16to8, + unop(Iop_32to16, + unop(Iop_64HIto32, + unop(Iop_V128to64, + getWReg(ws)))))); + break; + + case 5: + assign(t1, + unop(Iop_16HIto8, + unop(Iop_32to16, + unop(Iop_64HIto32, + unop(Iop_V128to64, + getWReg(ws)))))); + break; + + case 6: + assign(t1, + unop(Iop_16to8, + unop(Iop_32HIto16, + unop(Iop_64HIto32, + unop(Iop_V128to64, + getWReg(ws)))))); + break; + + case 7: + assign(t1, + unop(Iop_16HIto8, + unop(Iop_32HIto16, + unop(Iop_64HIto32, + unop(Iop_V128to64, + getWReg(ws)))))); + break; + + case 8: + assign(t1, + unop(Iop_16to8, + unop(Iop_32to16, + unop(Iop_64to32, + unop(Iop_V128HIto64, + getWReg(ws)))))); + break; + + case 9: + assign(t1, + unop(Iop_16HIto8, + unop(Iop_32to16, + unop(Iop_64to32, + unop(Iop_V128HIto64, + getWReg(ws)))))); + break; + + case 10: + assign(t1, + unop(Iop_16to8, + unop(Iop_32HIto16, + unop(Iop_64to32, + unop(Iop_V128HIto64, + getWReg(ws)))))); + break; + + case 11: + assign(t1, + unop(Iop_16HIto8, + unop(Iop_32HIto16, + unop(Iop_64to32, + unop(Iop_V128HIto64, + getWReg(ws)))))); + break; + + case 12: + assign(t1, + unop(Iop_16to8, + unop(Iop_32to16, + unop(Iop_64HIto32, + unop(Iop_V128HIto64, + getWReg(ws)))))); + break; + + case 13: + assign(t1, + unop(Iop_16HIto8, + unop(Iop_32to16, + unop(Iop_64HIto32, + unop(Iop_V128HIto64, + getWReg(ws)))))); + break; + + case 14: + assign(t1, + unop(Iop_16to8, + unop(Iop_32HIto16, + unop(Iop_64HIto32, + unop(Iop_V128HIto64, + getWReg(ws)))))); + break; + + case 15: + assign(t1, + unop(Iop_16HIto8, + unop(Iop_32HIto16, + unop(Iop_64HIto32, + unop(Iop_V128HIto64, + getWReg(ws)))))); + break; + } + + putIReg(wd, + unop(mode64 ? Iop_8Sto64 : Iop_8Sto32, + mkexpr(t1))); + break; + + case 0x20: /* COPY_S.H */ + DIP("COPY_S.H r%d, w%d[%d]", wd, ws, n); + t1 = newTemp(Ity_I16); + + switch (n) { + case 0: + assign(t1, + unop(Iop_32to16, + unop(Iop_64to32, + unop(Iop_V128to64, + getWReg(ws))))); + break; + + case 1: + assign(t1, + unop(Iop_32HIto16, + unop(Iop_64to32, + unop(Iop_V128to64, + getWReg(ws))))); + break; + + case 2: + assign(t1, + unop(Iop_32to16, + unop(Iop_64HIto32, + unop(Iop_V128to64, + getWReg(ws))))); + break; + + case 3: + assign(t1, + unop(Iop_32HIto16, + unop(Iop_64HIto32, + unop(Iop_V128to64, + getWReg(ws))))); + break; + + case 4: + assign(t1, + unop(Iop_32to16, + unop(Iop_64to32, + unop(Iop_V128HIto64, + getWReg(ws))))); + break; + + case 5: + assign(t1, + unop(Iop_32HIto16, + unop(Iop_64to32, + unop(Iop_V128HIto64, + getWReg(ws))))); + break; + + case 6: + assign(t1, + unop(Iop_32to16, + unop(Iop_64HIto32, + unop(Iop_V128HIto64, + getWReg(ws))))); + break; + + case 7: + assign(t1, + unop(Iop_32HIto16, + unop(Iop_64HIto32, + unop(Iop_V128HIto64, + getWReg(ws))))); + break; + } + + putIReg(wd, + unop(mode64 ? Iop_16Sto64 : Iop_16Sto32, + mkexpr(t1))); + break; + + case 0x30: /* COPY_S.W */ + DIP("COPY_S.W r%d, w%d[%d]", wd, ws, n); + + switch (n) { + case 0: + putIReg(wd, + mkWidenFrom32(ty, + unop(Iop_V128to32, + getWReg(ws)), + True)); + break; + + case 1: + t2 = newTemp(Ity_I64); + assign(t2, + unop(Iop_V128to64, getWReg(ws))); + putIReg(wd, + mkWidenFrom32(ty, + unop(Iop_64HIto32, + mkexpr(t2)), + True)); + break; + + case 2: + t2 = newTemp(Ity_I64); + assign(t2, + unop(Iop_V128HIto64, + getWReg(ws))); + putIReg(wd, + mkWidenFrom32(ty, + unop(Iop_64to32, + mkexpr(t2)), + True)); + break; + + case 3: + t2 = newTemp(Ity_I64); + assign(t2, + unop(Iop_V128HIto64, + getWReg(ws))); + putIReg(wd, + mkWidenFrom32(ty, + unop(Iop_64HIto32, + mkexpr(t2)), + True)); + break; + + default: + break; + } + + break; + + case 0x38: /* COPY_S.D */ + if (mode64) { + DIP("COPY_S.D r%d, w%d[%d]", wd, ws, n); + + switch (n) { + case 0: + putIReg(wd, + unop(Iop_V128to64, + getWReg(ws))); + break; + + case 1: + putIReg(wd, + unop(Iop_V128HIto64, + getWReg(ws))); + break; + } + } else { + return -2; + } + + break; + + default: + return -1; + } + + break; + + case 0x03: { /* COPY_U.df */ + switch (df) { + case 0x00: /* COPY_U.B */ + DIP("COPY_U.B r%d, w%d[%d]", wd, ws, n); + t1 = newTemp(Ity_I8); + + switch (n) { + case 0: + assign(t1, + unop(Iop_16to8, + unop(Iop_32to16, + unop(Iop_64to32, + unop(Iop_V128to64, + getWReg(ws)))))); + break; + + case 1: + assign(t1, + unop(Iop_16HIto8, + unop(Iop_32to16, + unop(Iop_64to32, + unop(Iop_V128to64, + getWReg(ws)))))); + break; + + case 2: + assign(t1, + unop(Iop_16to8, + unop(Iop_32HIto16, + unop(Iop_64to32, + unop(Iop_V128to64, + getWReg(ws)))))); + break; + + case 3: + assign(t1, + unop(Iop_16HIto8, + unop(Iop_32HIto16, + unop(Iop_64to32, + unop(Iop_V128to64, + getWReg(ws)))))); + break; + + case 4: + assign(t1, + unop(Iop_16to8, + unop(Iop_32to16, + unop(Iop_64HIto32, + unop(Iop_V128to64, + getWReg(ws)))))); + break; + + case 5: + assign(t1, + unop(Iop_16HIto8, + unop(Iop_32to16, + unop(Iop_64HIto32, + unop(Iop_V128to64, + getWReg(ws)))))); + break; + + case 6: + assign(t1, + unop(Iop_16to8, + unop(Iop_32HIto16, + unop(Iop_64HIto32, + unop(Iop_V128to64, + getWReg(ws)))))); + break; + + case 7: + assign(t1, + unop(Iop_16HIto8, + unop(Iop_32HIto16, + unop(Iop_64HIto32, + unop(Iop_V128to64, + getWReg(ws)))))); + break; + + case 8: + assign(t1, + unop(Iop_16to8, + unop(Iop_32to16, + unop(Iop_64to32, + unop(Iop_V128HIto64, + getWReg(ws)))))); + break; + + case 9: + assign(t1, + unop(Iop_16HIto8, + unop(Iop_32to16, + unop(Iop_64to32, + unop(Iop_V128HIto64, + getWReg(ws)))))); + break; + + case 10: + assign(t1, + unop(Iop_16to8, + unop(Iop_32HIto16, + unop(Iop_64to32, + unop(Iop_V128HIto64, + getWReg(ws)))))); + break; + + case 11: + assign(t1, + unop(Iop_16HIto8, + unop(Iop_32HIto16, + unop(Iop_64to32, + unop(Iop_V128HIto64, + getWReg(ws)))))); + break; + + case 12: + assign(t1, + unop(Iop_16to8, + unop(Iop_32to16, + unop(Iop_64HIto32, + unop(Iop_V128HIto64, + getWReg(ws)))))); + break; + + case 13: + assign(t1, + unop(Iop_16HIto8, + unop(Iop_32to16, + unop(Iop_64HIto32, + unop(Iop_V128HIto64, + getWReg(ws)))))); + break; + + case 14: + assign(t1, + unop(Iop_16to8, + unop(Iop_32HIto16, + unop(Iop_64HIto32, + unop(Iop_V128HIto64, + getWReg(ws)))))); + break; + + case 15: + assign(t1, + unop(Iop_16HIto8, + unop(Iop_32HIto16, + unop(Iop_64HIto32, + unop(Iop_V128HIto64, + getWReg(ws)))))); + break; + } + + putIReg(wd, + unop(mode64 ? Iop_8Uto64 : Iop_8Uto32, + mkexpr(t1))); + break; + + case 0x20: /* COPY_U.H */ + DIP("COPY_U.H r%d, w%d[%d]", wd, ws, n); + t1 = newTemp(Ity_I16); + + switch (n) { + case 0: + assign(t1, + unop(Iop_32to16, + unop(Iop_64to32, + unop(Iop_V128to64, + getWReg(ws))))); + break; + + case 1: + assign(t1, + unop(Iop_32HIto16, + unop(Iop_64to32, + unop(Iop_V128to64, + getWReg(ws))))); + break; + + case 2: + assign(t1, + unop(Iop_32to16, + unop(Iop_64HIto32, + unop(Iop_V128to64, + getWReg(ws))))); + break; + + case 3: + assign(t1, + unop(Iop_32HIto16, + unop(Iop_64HIto32, + unop(Iop_V128to64, + getWReg(ws))))); + break; + + case 4: + assign(t1, + unop(Iop_32to16, + unop(Iop_64to32, + unop(Iop_V128HIto64, + getWReg(ws))))); + break; + + case 5: + assign(t1, + unop(Iop_32HIto16, + unop(Iop_64to32, + unop(Iop_V128HIto64, + getWReg(ws))))); + break; + + case 6: + assign(t1, + unop(Iop_32to16, + unop(Iop_64HIto32, + unop(Iop_V128HIto64, + getWReg(ws))))); + break; + + case 7: + assign(t1, + unop(Iop_32HIto16, + unop(Iop_64HIto32, + unop(Iop_V128HIto64, + getWReg(ws))))); + break; + } + + putIReg(wd, + unop(mode64 ? Iop_16Uto64 : Iop_16Uto32, + mkexpr(t1))); + break; + + case 0x30: /* COPY_U.W */ + DIP("COPY_U.W r%d, w%d[%d]", wd, ws, n); + + switch (n) { + case 0: + putIReg(wd, + mkWidenFrom32(ty, + unop(Iop_V128to32, + getWReg(ws)), + False)); + break; + + case 1: + t2 = newTemp(Ity_I64); + assign(t2, + unop(Iop_V128to64, + getWReg(ws))); + putIReg(wd, + mkWidenFrom32(ty, + unop(Iop_64HIto32, + mkexpr(t2)), + False)); + break; + + case 2: + t2 = newTemp(Ity_I64); + assign(t2, + unop(Iop_V128HIto64, + getWReg(ws))); + putIReg(wd, + mkWidenFrom32(ty, + unop(Iop_64to32, + mkexpr(t2)), + False)); + break; + + case 3: + t2 = newTemp(Ity_I64); + assign(t2, + unop(Iop_V128HIto64, + getWReg(ws))); + putIReg(wd, + mkWidenFrom32(ty, + unop(Iop_64HIto32, + mkexpr(t2)), + False)); + break; + + default: + break; + } + + break; + + default: + return -1; + } + + break; + } + + case 0x04: { /* INSERT.df */ + t5 = newTemp(Ity_I64); + UInt hi = 1; + ULong mask; + IRTemp *src, *dst; + assign(t5, mode64 ? getIReg(ws) : + unop(Iop_32Uto64, getIReg(ws))); + + if (df == 0x38) { /* INSERT.D */ + if (mode64) { + DIP("INSERT.D w%d[%d], r%d", wd, n, ws); + + if (n == 0) { + putWReg(wd, + binop(Iop_64HLtoV128, + unop(Iop_V128HIto64, + getWReg(wd)), + mkexpr(t5))); + } else { + putWReg(wd, + binop(Iop_64HLtoV128, + mkexpr(t5), + unop(Iop_V128to64, + getWReg(wd)))); + } + + break; + } else { + return -2; + } + } else { + t1 = newTemp(Ity_I64); + t2 = newTemp(Ity_I64); + assign(t1, unop(Iop_V128to64, getWReg(wd))); + assign(t2, unop(Iop_V128HIto64, getWReg(wd))); + } + + switch (df) { + case 0x00: /* INSERT.B */ + DIP("INSERT.B w%d[%d], r%d", wd, n, ws); + + if (n >= 8) { + n -= 8; + } else { + hi = 0; + } + + n <<= 3; + mask = 0xFFull; + break; + + case 0x20: /* INSERT.H */ + DIP("INSERT.H w%d[%d], r%d", wd, n, ws); + + if (n >= 4) { + n -= 4; + } else { + hi = 0; + } + + n <<= 4; + mask = 0xFFFFull; + break; + + case 0x30: /* INSERT.W */ + DIP("INSERT.W w%d[%d], r%d", wd, n, ws); + + if (n >= 2) { + n -= 2; + } else { + hi = 0; + } + + n <<= 5; + mask = 0xFFFFFFFFull; + break; + + default: + return -1; + } + + if (hi) { + t4 = newTemp(Ity_I64); + src = &t2; + dst = &t4; + t3 = t1; + } else { + t3 = newTemp(Ity_I64); + src = &t1; + dst = &t3; + t4 = t2; + } + + mask <<= n; + assign(*dst, + binop(Iop_Or64, + binop(Iop_And64, mkexpr(*src), mkU64(~mask)), + binop(Iop_And64, + binop(Iop_Shl64, mkexpr(t5), mkU8(n)), + mkU64(mask)))); + putWReg(wd, + binop(Iop_64HLtoV128, mkexpr(t4), mkexpr(t3))); + break; + } + + case 0x05: { /* INSVE.df */ + switch (df) { + case 0x00: { /* INSVE.B */ + DIP("INSVE.B w%d[%d], w%d[0]", wd, n, ws); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, getWReg(wd)); + assign(t2, getWReg(ws)); + Int i; + IRTemp tmp[16]; + + for (i = 0; i < 16; i++) { + tmp[i] = newTemp(Ity_I8); + + if (n == i) + assign(tmp[i], + binop(Iop_GetElem8x16, + mkexpr(t2), mkU8(0x0))); + else + assign(tmp[i], + binop(Iop_GetElem8x16, + mkexpr(t1), mkU8(i))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_16HLto32, + binop(Iop_8HLto16, + mkexpr(tmp[15]), + mkexpr(tmp[14])), + binop(Iop_8HLto16, + mkexpr(tmp[13]), + mkexpr(tmp[12]))), + binop(Iop_16HLto32, + binop(Iop_8HLto16, + mkexpr(tmp[11]), + mkexpr(tmp[10])), + binop(Iop_8HLto16, + mkexpr(tmp[9]), + mkexpr(tmp[8])))), + binop(Iop_32HLto64, + binop(Iop_16HLto32, + binop(Iop_8HLto16, + mkexpr(tmp[7]), + mkexpr(tmp[6])), + binop(Iop_8HLto16, + mkexpr(tmp[5]), + mkexpr(tmp[4]))), + binop(Iop_16HLto32, + binop(Iop_8HLto16, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_8HLto16, + mkexpr(tmp[1]), + mkexpr(tmp[0])))))); + break; + } + + case 0x20: { /* INSVE.H */ + DIP("INSVE.H w%d[%d], r%d[0]", wd, n, ws); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, getWReg(wd)); + assign(t2, getWReg(ws)); + Int i; + IRTemp tmp[8]; + + for (i = 0; i < 8; i++) { + tmp[i] = newTemp(Ity_I16); + + if (n == i) + assign(tmp[i], + binop(Iop_GetElem16x8, + mkexpr(t2), mkU8(0x0))); + else + assign(tmp[i], + binop(Iop_GetElem16x8, + mkexpr(t1), mkU8(i))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + binop(Iop_16HLto32, + mkexpr(tmp[7]), + mkexpr(tmp[6])), + binop(Iop_16HLto32, + mkexpr(tmp[5]), + mkexpr(tmp[4]))), + binop(Iop_32HLto64, + binop(Iop_16HLto32, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_16HLto32, + mkexpr(tmp[1]), + mkexpr(tmp[0]))))); + break; + } + + case 0x30: { /* INSVE.W */ + DIP("INSVE.W w%d[%d], r%d[0]", wd, n, ws); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, getWReg(wd)); + assign(t2, getWReg(ws)); + Int i; + IRTemp tmp[4]; + + for (i = 0; i < 4; i++) { + tmp[i] = newTemp(Ity_I32); + + if (n == i) + assign(tmp[i], + binop(Iop_GetElem32x4, + mkexpr(t2), mkU8(0x0))); + else + assign(tmp[i], + binop(Iop_GetElem32x4, + mkexpr(t1), mkU8(i))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_32HLto64, + mkexpr(tmp[1]), + mkexpr(tmp[0])))); + break; + } + + case 0x38: { /* INSVE.D */ + DIP("INSVE.D w%d[%d], r%d[0]", wd, n, ws); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, getWReg(wd)); + assign(t2, getWReg(ws)); + Int i; + IRTemp tmp[2]; + + for (i = 0; i < 2; i++) { + tmp[i] = newTemp(Ity_I64); + + if (n == i) + assign(tmp[i], + binop(Iop_GetElem64x2, + mkexpr(t2), mkU8(0x0))); + else + assign(tmp[i], + binop(Iop_GetElem64x2, + mkexpr(t1), mkU8(i))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + mkexpr(tmp[1]), mkexpr(tmp[0]))); + break; + } + } + + break; + } + + default: + return -1; + } + } + return 0; +} + +static Int msa_VEC(UInt cins, UChar wd, UChar ws) { /* VEC */ + IRTemp t1, t2, t3; + UShort operation; + UChar wt; + + vassert((cins & 0x03000000) == 0); + + operation = (cins & 0x03E00000) >> 21; + wt = (cins & 0x001F0000) >> 16; + + switch (operation) { + case 0x00: { /* AND.V */ + DIP("AND.V w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_AndV128, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x01: { /* OR.V */ + DIP("OR.V w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_OrV128, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x02: { /* NOR.V */ + DIP("NOR.V w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, + unop(Iop_NotV128, + binop(Iop_OrV128, mkexpr(t1), mkexpr(t2)))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x03: { /* XOR.V */ + DIP("XOR.V w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + assign(t2, getWReg(wt)); + assign(t3, binop(Iop_XorV128, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x04: { /* BMNZ (ws AND wt) OR (wd AND NOT wt) */ + DIP("BMNZ.V w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, + binop(Iop_AndV128, + getWReg(ws), getWReg(wt))); + assign(t2, + binop(Iop_AndV128, + getWReg(wd), + unop(Iop_NotV128, getWReg(wt)))); + assign(t3, binop(Iop_OrV128, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x05: { /* BMZ.V (ws AND NOT wt) OR (wd AND wt) */ + DIP("BMZ.V w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, + binop(Iop_AndV128, + getWReg(wd), getWReg(wt))); + assign(t2, + binop(Iop_AndV128, + getWReg(ws), + unop(Iop_NotV128, getWReg(wt)))); + assign(t3, binop(Iop_OrV128, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + case 0x06: { /* BSEL (ws AND NOT wd) OR (wt AND wd) */ + DIP("BSEL.V w%d, w%d, w%d", wd, ws, wt); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, + binop(Iop_AndV128, + getWReg(wd), getWReg(wt))); + assign(t2, + binop(Iop_AndV128, + getWReg(ws), + unop(Iop_NotV128, getWReg(wd)))); + assign(t3, binop(Iop_OrV128, mkexpr(t1), mkexpr(t2))); + putWReg(wd, mkexpr(t3)); + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_2R(UInt cins, UChar wd, UChar ws) { /* 2R */ + IRTemp t1, t2, t3, t4; + IRType ty; + UShort operation; + UChar df; + + vassert((cins & 0x00200000) == 0); + + operation = (cins & 0x03FC0000) >> 18; + df = (cins & 0x00030000) >> 16; + ty = mode64 ? Ity_I64 : Ity_I32; + + switch (operation) { + case 0xC0: { /* FILL.df */ + t1 = newTemp(Ity_I64); + + switch (df) { + case 0x00: /* FILL.B */ + DIP("FILL.B w%d, r%d", wd, ws); + t2 = newTemp(Ity_I32); + t3 = newTemp(Ity_I16); + t4 = newTemp(Ity_I8); + assign(t4, mkNarrowTo8(ty, getIReg(ws))); + assign(t3, + binop(Iop_8HLto16, mkexpr(t4), mkexpr(t4))); + assign(t2, + binop(Iop_16HLto32, mkexpr(t3), mkexpr(t3))); + assign(t1, + binop(Iop_32HLto64, mkexpr(t2), mkexpr(t2))); + break; + + case 0x01: /* FILL.H */ + DIP("FILL.H w%d, r%d", wd, ws); + t2 = newTemp(Ity_I32); + t3 = newTemp(Ity_I16); + assign(t3, mkNarrowTo16(ty, getIReg(ws))); + assign(t2, + binop(Iop_16HLto32, mkexpr(t3), mkexpr(t3))); + assign(t1, + binop(Iop_32HLto64, mkexpr(t2), mkexpr(t2))); + break; + + case 0x02: /* FILL.W */ + DIP("FILL.W w%d, r%d", wd, ws); + t2 = newTemp(Ity_I32); + assign(t2, mkNarrowTo32(ty, getIReg(ws))); + assign(t1, + binop(Iop_32HLto64, mkexpr(t2), mkexpr(t2))); + break; + + case 0x03: /* FILL.D */ + if (mode64) { + DIP("FILL.W w%d, r%d", wd, ws); + t2 = newTemp(Ity_I32); + assign(t1, getIReg(ws)); + } else { + return -2; + } + + break; + + default: + return -1; + } + + putWReg(wd, + binop(Iop_64HLtoV128, mkexpr(t1), mkexpr(t1))); + break; + } + + case 0xC1: { /* PCNT.df */ + switch (df) { + case 0x00: /* PCNT.B */ + DIP("PCNT.B w%d, r%d", wd, ws); + putWReg(wd, + unop(Iop_Cnt8x16, getWReg(ws))); + break; + + case 0x01: /* PCNT.H */ + DIP("PCNT.H w%d, r%d", wd, ws); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + assign(t1, unop(Iop_Cnt8x16, getWReg(ws))); + assign(t2, + binop(Iop_Add16x8, + binop(Iop_AndV128, + mkexpr(t1), + binop(Iop_64HLtoV128, + mkU64(0x00FF00FF00FF00FFULL), + mkU64(0x00FF00FF00FF00FFULL))), + binop(Iop_AndV128, + binop(Iop_ShrN16x8, + mkexpr(t1), mkU8(8)), + binop(Iop_64HLtoV128, + mkU64(0x00FF00FF00FF00FFULL), + mkU64(0x00FF00FF00FF00FFULL))))); + putWReg(wd, mkexpr(t2)); + break; + + case 0x02: /* PCNT.W */ + DIP("PCNT.W w%d, r%d", wd, ws); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, unop(Iop_Cnt8x16, getWReg(ws))); + assign(t2, + binop(Iop_Add32x4, + binop(Iop_AndV128, + mkexpr(t1), + binop(Iop_64HLtoV128, + mkU64(0x00FF00FF00FF00FFULL), + mkU64(0x00FF00FF00FF00FFULL))), + binop(Iop_AndV128, + binop(Iop_ShrN32x4, + mkexpr(t1), mkU8(8)), + binop(Iop_64HLtoV128, + mkU64(0x00FF00FF00FF00FFULL), + mkU64(0x00FF00FF00FF00FFULL))))); + assign(t3, + binop(Iop_Add32x4, + binop(Iop_AndV128, + mkexpr(t2), + binop(Iop_64HLtoV128, + mkU64(0x0000FFFF0000FFFFULL), + mkU64(0x0000FFFF0000FFFFULL))), + binop(Iop_AndV128, + binop(Iop_ShrN32x4, + mkexpr(t2), mkU8(16)), + binop(Iop_64HLtoV128, + mkU64(0x0000FFFF0000FFFFULL), + mkU64(0x0000FFFF0000FFFFULL))))); + putWReg(wd, mkexpr(t3)); + break; + + case 0x03: /* PCNT.D */ + DIP("PCNT.D w%d, r%d", wd, ws); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128);; + assign(t1, unop(Iop_Cnt8x16, getWReg(ws))); + assign(t2, + binop(Iop_Add64x2, + binop(Iop_AndV128, + mkexpr(t1), + binop(Iop_64HLtoV128, + mkU64(0x00FF00FF00FF00FFULL), + mkU64(0x00FF00FF00FF00FFULL))), + binop(Iop_AndV128, + binop(Iop_ShrN64x2, + mkexpr(t1), mkU8(8)), + binop(Iop_64HLtoV128, + mkU64(0x00FF00FF00FF00FFULL), + mkU64(0x00FF00FF00FF00FFULL))))); + assign(t3, + binop(Iop_Add64x2, + binop(Iop_AndV128, + mkexpr(t2), + binop(Iop_64HLtoV128, + mkU64(0x0000FFFF0000FFFFULL), + mkU64(0x0000FFFF0000FFFFULL))), + binop(Iop_AndV128, + binop(Iop_ShrN64x2, + mkexpr(t2), mkU8(16)), + binop(Iop_64HLtoV128, + mkU64(0x0000FFFF0000FFFFULL), + mkU64(0x0000FFFF0000FFFFULL))))); + assign(t4, + binop(Iop_Add64x2, + binop(Iop_AndV128, + mkexpr(t3), + binop(Iop_64HLtoV128, + mkU64(0x00000000FFFFFFFFULL), + mkU64(0x00000000FFFFFFFFULL))), + binop(Iop_AndV128, + binop(Iop_ShrN64x2, + mkexpr(t3), mkU8(32)), + binop(Iop_64HLtoV128, + mkU64(0x00000000FFFFFFFFULL), + mkU64(0x00000000FFFFFFFFULL))))); + putWReg(wd, mkexpr(t4)); + break; + + default: + return -1; + } + + break; + } + + case 0xC2: { /* NLOC.df */ + switch (df) { + case 0x00: /* NLOC.B */ + DIP("NLOC.B w%d, w%d", wd, ws); + putWReg(wd, + unop(Iop_Cls8x16, getWReg(ws))); + break; + + case 0x01: /* NLOC.H */ + DIP("NLOC.H w%d, w%d", wd, ws); + putWReg(wd, + unop(Iop_Cls16x8, getWReg(ws))); + break; + + case 0x02: /* NLOC.W */ + DIP("NLOC.W w%d, w%d", wd, ws); + putWReg(wd, + unop(Iop_Cls32x4, getWReg(ws))); + break; + + case 0x03: /* NLOC.D */ + DIP("NLOC.D w%d, w%d", wd, ws); + t1 = newTemp(Ity_V128); + assign(t1, unop(Iop_NotV128, getWReg(ws))); + putWReg(wd, unop(Iop_Clz64x2, mkexpr(t1))); + break; + + default: + return -1; + } + + break; + } + + case 0xC3: { /* NLZC.df */ + switch (df) { + case 0x00: /* NLZC.B */ + DIP("NLZC.W w%d, w%d", wd, ws); + putWReg(wd, + unop(Iop_Clz8x16, getWReg(ws))); + break; + + case 0x01: /* NLZC.H */ + DIP("NLZC.H w%d, w%d", wd, ws); + putWReg(wd, + unop(Iop_Clz16x8, getWReg(ws))); + break; + + case 0x02: /* NLZC.W */ + DIP("NLZC.W w%d, w%d", wd, ws); + putWReg(wd, + unop(Iop_Clz32x4, getWReg(ws))); + break; + + case 0x03: {/* NLZC.D */ + putWReg(wd, + unop(Iop_Clz64x2, getWReg(ws))); + break; + } + + default: + return -1; + } + + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_2RF(UInt cins, UChar wd, UChar ws) { /* 2RF */ + IRTemp t1, t2, t3, t4, t5; + UShort operation; + UChar df, wt; + + operation = (cins & 0x03FE0000) >> 17; + df = (cins & 0x00010000) >> 16; + wt = (cins & 0x001F0000) >> 16; + + switch (operation) { + + case 0x190: { /* FCLASS.df */ + IRTemp t0 = newTemp(Ity_V128); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + t5 = newTemp(Ity_V128); + + switch (df) { + case 0x00: { /* FCLASS.W */ + DIP("FCLASS.W w%d, w%d", wd, ws); + assign(t0, + binop(Iop_CmpEQ32x4, + binop(Iop_AndV128, + getWReg(ws), + binop(Iop_64HLtoV128, + mkU64(0x7F8000007F800000ull), + mkU64(0x7F8000007F800000ull))), + binop(Iop_64HLtoV128, + mkU64(0ull), mkU64(0ull)))); + assign(t1, + binop(Iop_CmpEQ32x4, + binop(Iop_AndV128, + getWReg(ws), + binop(Iop_64HLtoV128, + mkU64(0x7F8000007F800000ull), + mkU64(0x7F8000007F800000ull))), + binop(Iop_64HLtoV128, + mkU64(0x7F8000007F800000ull), + mkU64(0x7F8000007F800000ull)))); + assign(t2, + binop(Iop_SarN32x4, + getWReg(ws), mkU8(31))); + assign(t3, + binop(Iop_CmpEQ32x4, + binop(Iop_AndV128, + getWReg(ws), + binop(Iop_64HLtoV128, + mkU64(0x0040000000400000ull), + mkU64(0x0040000000400000ull))), + binop(Iop_64HLtoV128, + mkU64(0x0040000000400000ull), + mkU64(0x0040000000400000ull)))); + assign(t4, + binop(Iop_CmpEQ32x4, + binop(Iop_AndV128, + getWReg(ws), + binop(Iop_64HLtoV128, + mkU64(0x007FFFFF007FFFFFULL), + mkU64(0x007FFFFF007FFFFFULL))), + binop(Iop_64HLtoV128, + mkU64(0ull), mkU64(0ull)))); + assign(t5, + binop(Iop_Shl32x4, + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t1), + binop(Iop_AndV128, + mkexpr(t4), + binop(Iop_64HLtoV128, + mkU64(0x100000001ull), + mkU64(0x100000001ull)))), + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t0), + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t4), + binop(Iop_64HLtoV128, + mkU64(0x800000008ull), + mkU64(0x800000008ull))), + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t4)), + binop(Iop_64HLtoV128, + mkU64(0x400000004ull), + mkU64(0x400000004ull))))), + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t1)), + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t0)), + binop(Iop_64HLtoV128, + mkU64(0x200000002ull), + mkU64(0x200000002ull)))))), + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t2), + binop(Iop_64HLtoV128, + mkU64(0x200000002ull), + mkU64(0x200000002ull))), + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t2)), + binop(Iop_64HLtoV128, + mkU64(0x600000006ull), + mkU64(0x600000006ull)))))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t5), + binop(Iop_AndV128, + binop(Iop_CmpEQ32x4, + mkexpr(t5), + binop(Iop_64HLtoV128, + mkU64(0ull), + mkU64(0ull))), + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t3), + binop(Iop_64HLtoV128, + mkU64(0x100000001ull), + mkU64(0x100000001ull))), + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t3)), + binop(Iop_64HLtoV128, + mkU64(0x200000002ull), + mkU64(0x200000002ull))))))); + break; + } + + case 0x01: { /* FCLASS.D */ + DIP("FCLASS.D w%d, w%d", wd, ws); + assign(t0, + binop(Iop_CmpEQ64x2, + binop(Iop_AndV128, + getWReg(ws), + binop(Iop_64HLtoV128, + mkU64(0x7FF0000000000000ull), + mkU64(0x7FF0000000000000ull))), + binop(Iop_64HLtoV128, + mkU64(0ull), mkU64(0ull)))); + assign(t1, + binop(Iop_CmpEQ64x2, + binop(Iop_AndV128, + getWReg(ws), + binop(Iop_64HLtoV128, + mkU64(0x7FF0000000000000ull), + mkU64(0x7FF0000000000000ull))), + binop(Iop_64HLtoV128, + mkU64(0x7FF0000000000000ull), + mkU64(0x7FF0000000000000ull)))); + assign(t2, + binop(Iop_SarN64x2, + getWReg(ws), mkU8(63))); + assign(t3, + binop(Iop_CmpEQ64x2, + binop(Iop_AndV128, + getWReg(ws), + binop(Iop_64HLtoV128, + mkU64(0x0008000000000000ull), + mkU64(0x0008000000000000ull))), + binop(Iop_64HLtoV128, + mkU64(0x0008000000000000ull), + mkU64(0x0008000000000000ull)))); + assign(t4, + binop(Iop_CmpEQ64x2, + binop(Iop_AndV128, + getWReg(ws), + binop(Iop_64HLtoV128, + mkU64(0x000FFFFFFFFFFFFFULL), + mkU64(0x000FFFFFFFFFFFFFULL))), + binop(Iop_64HLtoV128, + mkU64(0ull), mkU64(0ull)))); + assign(t5, + binop(Iop_Shl64x2, + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t1), + binop(Iop_AndV128, + mkexpr(t4), + binop(Iop_64HLtoV128, + mkU64(1ull), + mkU64(1ull)))), + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t0), + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t4), + binop(Iop_64HLtoV128, + mkU64(8ull), + mkU64(8ull))), + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t4)), + binop(Iop_64HLtoV128, + mkU64(4ull), + mkU64(4ull))))), + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t1)), + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t0)), + binop(Iop_64HLtoV128, + mkU64(2ull), + mkU64(2ull)))))), + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t2), + binop(Iop_64HLtoV128, + mkU64(2ull), + mkU64(2ull))), + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t2)), + binop(Iop_64HLtoV128, + mkU64(6ull), + mkU64(6ull)))))); + putWReg(wd, + binop(Iop_OrV128, + mkexpr(t5), + binop(Iop_AndV128, + binop(Iop_CmpEQ64x2, + mkexpr(t5), + binop(Iop_64HLtoV128, + mkU64(0ull), + mkU64(0ull))), + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t3), + binop(Iop_64HLtoV128, + mkU64(1ull), + mkU64(1ull))), + binop(Iop_AndV128, + unop(Iop_NotV128, + mkexpr(t3)), + binop(Iop_64HLtoV128, + mkU64(2ull), + mkU64(2ull))))))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x191: { /* FTRUNC_S.df */ + switch (df) { + case 0x00: { /* FTRUNC_S.W */ + DIP("FTRUNC_S.W w%d, w%d", wd, ws); + calculateMSACSR(ws, wd, FTRUNCSW, 1); + putWReg(wd, unop(Iop_FtoI32Sx4_RZ, getWReg(ws))); + break; + } + + case 0x01: { /* FTRUNC_S.D */ + DIP("FTRUNC_S.D w%d, w%d", wd, ws); + calculateMSACSR(ws, wd, FTRUNCSD, 1); + t1 = newTemp(Ity_I64); + t2 = newTemp(Ity_I64); + t3 = newTemp(Ity_V128); + assign(t3, + binop(Iop_AndV128, + unop(Iop_NotV128, + binop(Iop_CmpUN64Fx2, + getWReg(ws), + getWReg(ws))), + binop(Iop_Max64Fx2, + getWReg(ws), + binop(Iop_64HLtoV128, + mkU64(0xC3E0000000000000), + mkU64(0xC3E0000000000000))))); + assign(t1, + unop(Iop_ReinterpF64asI64, + binop(Iop_RoundF64toInt, + mkU32(0x3), + unop(Iop_ReinterpI64asF64, + unop(Iop_V128to64, + mkexpr(t3)))))); + assign(t2, + unop(Iop_ReinterpF64asI64, + binop(Iop_RoundF64toInt, + mkU32(0x3), + unop(Iop_ReinterpI64asF64, + unop(Iop_V128HIto64, + mkexpr(t3)))))); + putWReg(wd, + binop(Iop_64HLtoV128, + mkexpr(t2), mkexpr(t1))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x192: { /* FTRUNC_U.df */ + switch (df) { + case 0x00: { /* FTRUNC_U.W */ + DIP("FTRUNC_U.W w%d, w%d", wd, ws); + calculateMSACSR(ws, wd, FTRUNCUW, 1); + putWReg(wd, unop(Iop_FtoI32Ux4_RZ, getWReg(ws))); + break; + } + + case 0x01: { /* FTRUNC_U.D */ + DIP("FTRUNC_U.D w%d, w%d", wd, ws); + calculateMSACSR(ws, wd, FTRUNCUD, 1); + t1 = newTemp(Ity_I64); + t2 = newTemp(Ity_I64); + assign(t1, + binop(Iop_F64toI64U, + mkU32(0x3), + unop(Iop_ReinterpI64asF64, + unop(Iop_V128to64, + getWReg(ws))))); + assign(t2, + binop(Iop_F64toI64U, + mkU32(0x3), + unop(Iop_ReinterpI64asF64, + unop(Iop_V128HIto64, + getWReg(ws))))); + putWReg(wd, + binop(Iop_64HLtoV128, + mkexpr(t2), mkexpr(t1))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x193: { /* FSQRT.df */ + switch (df) { + case 0x00: { /* FSQRT.W */ + DIP("FSQRT.W w%d, w%d", wd, ws); + IRExpr *rm = get_IR_roundingmode_MSA(); + calculateMSACSR(ws, wd, FSQRTW, 1); + putWReg(wd, binop(Iop_Sqrt32Fx4, rm, getWReg(ws))); + break; + } + + case 0x01: { /* FSQRT.D */ + DIP("FSQRT.D w%d, w%d", wd, ws); + IRExpr *rm = get_IR_roundingmode_MSA(); + calculateMSACSR(ws, wd, FSQRTD, 1); + putWReg(wd, binop(Iop_Sqrt64Fx2, rm, getWReg(ws))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x194: { /* FRSQRT.df */ + switch (df) { + case 0x00: { /* FRSQRT.W */ + DIP("FRSQRT.W w%d, w%d", wd, ws); + calculateMSACSR(ws, wd, FRSQRTW, 1); + putWReg(wd, unop(Iop_RSqrtEst32Fx4, getWReg(ws))); + break; + } + + case 0x01: { /* FRSQRT.D */ + DIP("FRSQRT.D w%d, w%d", wd, ws); + calculateMSACSR(ws, wd, FRSQRTD, 1); + putWReg(wd, unop(Iop_RSqrtEst64Fx2, getWReg(ws))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x195: { /* FRCP.df */ + switch (df) { /* FRCP.W */ + case 0x00: { + DIP("FRCP.W w%d, w%d", wd, ws); + calculateMSACSR(ws, wd, FRCPW, 1); + putWReg(wd, unop(Iop_RecipEst32Fx4, getWReg(ws))); + break; + } + + case 0x01: { /* FRCP.D */ + DIP("FRCP.D w%d, w%d", wd, ws); + calculateMSACSR(ws, wd, FRCPD, 1); + putWReg(wd, unop(Iop_RecipEst64Fx2, getWReg(ws))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x196: { /* FRINT.df */ + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + IRExpr *rm = get_IR_roundingmode_MSA(); + assign(t1, getWReg(ws)); + + switch (df) { + case 0x00: { /* FRINT.W */ + DIP("FRINT.W w%d, w%d", wd, ws); + calculateMSACSR(ws, wt, FRINTW, 1); + assign(t2, + binop(Iop_OrV128, + binop(Iop_CmpLT32Fx4, + mkexpr(t1), + binop(Iop_64HLtoV128, + mkU64(0xCF000000CF000000ull), + mkU64(0xCF000000CF000000ull))), + binop(Iop_CmpLT32Fx4, + binop(Iop_64HLtoV128, + mkU64(0x4F0000004F000000ull), + mkU64(0x4F0000004F000000ull)), + mkexpr(t1)))); + assign(t3, + binop(Iop_CmpEQ32x4, + binop(Iop_AndV128, + mkexpr(t1), + binop(Iop_64HLtoV128, + mkU64(0x0040000000400000ull), + mkU64(0x0040000000400000ull))), + binop(Iop_64HLtoV128, + mkU64(0x0040000000400000ull), + mkU64(0x0040000000400000ull)))); + assign(t4, + binop(Iop_CmpUN32Fx4, + mkexpr(t1), mkexpr(t1))); + IRTemp tmp[4]; + Int i; + + for (i = 0; i < 4; i++) { + tmp[i] = newTemp(Ity_I32); + assign(tmp[i], + unop(Iop_ReinterpF32asI32, + binop(Iop_I32StoF32, rm, + unop(Iop_ReinterpF32asI32, + binop(Iop_RoundF32toInt, rm, + unop(Iop_ReinterpI32asF32, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(i)))))))); + } + + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_OrV128, + binop(Iop_AndV128, + binop(Iop_OrV128, + mkexpr(t2), + binop(Iop_AndV128, + mkexpr(t4), + unop(Iop_NotV128, + mkexpr(t3)))), + mkexpr(t1)), + binop(Iop_AndV128, + binop(Iop_AndV128, + mkexpr(t4), + mkexpr(t3)), + binop(Iop_64HLtoV128, + mkU64(0x7FBFFFFF7FBFFFFF), + mkU64(0x7FBFFFFF7FBFFFFF)))), + binop(Iop_AndV128, + unop(Iop_NotV128, + binop(Iop_OrV128, + mkexpr(t2), + mkexpr(t4))), + binop(Iop_OrV128, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + mkexpr(tmp[3]), + mkexpr(tmp[2])), + binop(Iop_32HLto64, + mkexpr(tmp[1]), + mkexpr(tmp[0]))), + binop(Iop_AndV128, + mkexpr(t1), + binop(Iop_64HLtoV128, + mkU64(0x8000000080000000ull), + mkU64(0x8000000080000000ull))) + )))); + break; + } + + case 0x01: { /* FRINT.D */ + DIP("FRINT.D w%d, w%d", wd, ws); + calculateMSACSR(ws, wt, FRINTD, 1); + assign(t2, + binop(Iop_OrV128, + binop(Iop_CmpLT64Fx2, + mkexpr(t1), + binop(Iop_64HLtoV128, + mkU64(0xC3E0000000000000ull), + mkU64(0xC3E0000000000000ull))), + binop(Iop_CmpLT64Fx2, + binop(Iop_64HLtoV128, + mkU64(0x43E0000000000000ull), + mkU64(0x43E0000000000000ull)), + mkexpr(t1)))); + assign(t3, + binop(Iop_CmpEQ64x2, + binop(Iop_AndV128, + getWReg(ws), + binop(Iop_64HLtoV128, + mkU64(0x0008000000000000ull), + mkU64(0x0008000000000000ull))), + binop(Iop_64HLtoV128, + mkU64(0x0008000000000000ull), + mkU64(0x0008000000000000ull)))); + assign(t4, + binop(Iop_CmpUN64Fx2, + mkexpr(t1), mkexpr(t1))); + IRTemp tmp[2]; + Int i; + + for (i = 0; i < 2; i++) { + tmp[i] = newTemp(Ity_I64); + assign(tmp[i], + unop(Iop_ReinterpF64asI64, + binop(Iop_I64StoF64, rm, + unop(Iop_ReinterpF64asI64, + binop(Iop_RoundF64toInt, rm, + unop(Iop_ReinterpI64asF64, + binop(Iop_GetElem64x2, + mkexpr(t1), + mkU8(i)))))))); + } + + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_OrV128, + binop(Iop_AndV128, + binop(Iop_OrV128, + mkexpr(t2), + binop(Iop_AndV128, + mkexpr(t4), + unop(Iop_NotV128, + mkexpr(t3)))), + mkexpr(t1)), + binop(Iop_AndV128, + binop(Iop_AndV128, + mkexpr(t4), + mkexpr(t3)), + binop(Iop_64HLtoV128, + mkU64(0x7FF7FFFFFFFFFFFF), + mkU64(0x7FF7FFFFFFFFFFFF)))), + binop(Iop_AndV128, + unop(Iop_NotV128, + binop(Iop_OrV128, + mkexpr(t2), + mkexpr(t4))), + binop(Iop_OrV128, + binop(Iop_64HLtoV128, + mkexpr(tmp[1]), + mkexpr(tmp[0])), + binop(Iop_AndV128, + mkexpr(t1), + binop(Iop_64HLtoV128, + mkU64(0x8000000000000000ull), + mkU64(0x8000000000000000ull)) + ))))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x197: { /* FLOG2.df */ + + switch (df) { + case 0x00: { /* FLOG2.W */ + DIP("FLOG2.W w%d, w%d", wd, ws); + calculateMSACSR(ws, wt, FLOG2W, 1); + putWReg(wd, unop(Iop_Log2_32Fx4, getWReg(ws))); + break; + } + + case 0x01: { /* FLOG2.D */ + DIP("FLOG2.D w%d, w%d", wd, ws); + calculateMSACSR(ws, wt, FLOG2D, 1); + putWReg(wd, unop(Iop_Log2_64Fx2, getWReg(ws))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x198: { /* FEXUPL.df */ + switch (df) { + case 0x00: { /* FEXUPL.W */ + DIP("FEXUPL.W w%d, w%d", wd, ws); + calculateMSACSR(ws, wt, FEXUPLW, 1); + putWReg(wd, + unop(Iop_F16toF32x4, + unop(Iop_V128HIto64, + getWReg(ws)))); + break; + } + + case 0x01: { /* FEXUPL.D */ + DIP("FEXUPL.D w%d, w%d", wd, ws); + calculateMSACSR(ws, wt, FEXUPLD, 1); + t1 = newTemp(Ity_I64); + t2 = newTemp(Ity_I64); + assign(t1, + unop(Iop_ReinterpF64asI64, + unop(Iop_F32toF64, + unop(Iop_ReinterpI32asF32, + unop(Iop_64to32, + unop(Iop_V128HIto64, + getWReg(ws))))))); + assign(t2, + unop(Iop_ReinterpF64asI64, + unop(Iop_F32toF64, + unop(Iop_ReinterpI32asF32, + unop(Iop_64HIto32, + unop(Iop_V128HIto64, + getWReg(ws))))))); + putWReg(wd, + binop(Iop_64HLtoV128, + mkexpr(t2), mkexpr(t1))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x199: { /* FEXUPR.df */ + switch (df) { + case 0x00: { /* FEXUPR.W */ + DIP("FEXUPR.W w%d, w%d", wd, ws); + calculateMSACSR(ws, wt, FEXUPRW, 1); + putWReg(wd, + unop(Iop_F16toF32x4, + unop(Iop_V128to64, + getWReg(ws)))); + break; + } + + case 0x01: { /* FEXUPR.D */ + DIP("FEXUPR.D w%d, w%d", wd, ws); + calculateMSACSR(ws, wt, FEXUPRD, 1); + t1 = newTemp(Ity_I64); + t2 = newTemp(Ity_I64); + assign(t1, + unop(Iop_ReinterpF64asI64, + unop(Iop_F32toF64, + unop(Iop_ReinterpI32asF32, + unop(Iop_64to32, + unop(Iop_V128to64, + getWReg(ws))))))); + assign(t2, + unop(Iop_ReinterpF64asI64, + unop(Iop_F32toF64, + unop(Iop_ReinterpI32asF32, + unop(Iop_64HIto32, + unop(Iop_V128to64, + getWReg(ws))))))); + putWReg(wd, + binop(Iop_64HLtoV128, + mkexpr(t2), mkexpr(t1))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x19A: { /* FFQL.df */ + switch (df) { + case 0x00: { /* FFQL.W */ + DIP("FFQL.W w%d, w%d", wd, ws); + calculateMSACSR(ws, wt, FFQLW, 1); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_I64); + t3 = newTemp(Ity_I64); + IRExpr *rm = get_IR_roundingmode_MSA(); + assign(t1, + binop(Iop_SarN32x4, + binop(Iop_InterleaveHI16x8, + getWReg(ws), + getWReg(ws)), + mkU8(16))); + assign(t2, + binop(Iop_32HLto64, + unop(Iop_ReinterpF32asI32, + binop(Iop_I32StoF32, rm, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(1)))), + unop(Iop_ReinterpF32asI32, + binop(Iop_I32StoF32, rm, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(0)))))); + assign(t3, + binop(Iop_32HLto64, + unop(Iop_ReinterpF32asI32, + binop(Iop_I32StoF32, rm, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(3)))), + unop(Iop_ReinterpF32asI32, + binop(Iop_I32StoF32, rm, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(2)))))); + putWReg(wd, + triop(Iop_Div32Fx4, rm, + binop(Iop_64HLtoV128, + mkexpr(t3), mkexpr(t2)), + binop(Iop_64HLtoV128, + mkU64(0x4700000047000000), + mkU64(0x4700000047000000)))); + break; + } + + case 0x01: { /* FFQL.D */ + DIP("FFQL.D w%d, w%d", wd, ws); + calculateMSACSR(ws, wt, FFQLD, 1); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_I64); + t3 = newTemp(Ity_I64); + IRExpr *rm = get_IR_roundingmode_MSA(); + assign(t1, + binop(Iop_SarN64x2, + binop(Iop_InterleaveHI32x4, + getWReg(ws), + getWReg(ws)), + mkU8(32))); + assign(t2, + unop(Iop_ReinterpF64asI64, + binop(Iop_I64StoF64, rm, + unop(Iop_V128to64, + mkexpr(t1))))); + assign(t3, + unop(Iop_ReinterpF64asI64, + binop(Iop_I64StoF64, rm, + unop(Iop_V128HIto64, + mkexpr(t1))))); + putWReg(wd, + triop(Iop_Div64Fx2, rm, + binop(Iop_64HLtoV128, + mkexpr(t3), mkexpr(t2)), + binop(Iop_64HLtoV128, + mkU64(0x41E0000000000000), + mkU64(0x41E0000000000000)))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x19B: { /* FFQR.df */ + switch (df) { + case 0x00: { /* FFQR.W */ + DIP("FFQR.W w%d, w%d", wd, ws); + calculateMSACSR(ws, wt, FFQRW, 1); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_I64); + t3 = newTemp(Ity_I64); + IRExpr *rm = get_IR_roundingmode_MSA(); + assign(t1, + binop(Iop_SarN32x4, + binop(Iop_InterleaveLO16x8, + getWReg(ws), + getWReg(ws)), + mkU8(16))); + assign(t2, + binop(Iop_32HLto64, + unop(Iop_ReinterpF32asI32, + binop(Iop_I32StoF32, rm, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(1)))), + unop(Iop_ReinterpF32asI32, + binop(Iop_I32StoF32, rm, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(0)))))); + assign(t3, + binop(Iop_32HLto64, + unop(Iop_ReinterpF32asI32, + binop(Iop_I32StoF32, rm, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(3)))), + unop(Iop_ReinterpF32asI32, + binop(Iop_I32StoF32, rm, + binop(Iop_GetElem32x4, + mkexpr(t1), + mkU8(2)))))); + putWReg(wd, + triop(Iop_Div32Fx4, rm, + binop(Iop_64HLtoV128, + mkexpr(t3), mkexpr(t2)), + binop(Iop_64HLtoV128, + mkU64(0x4700000047000000), + mkU64(0x4700000047000000)))); + break; + } + + case 0x01: { /* FFQR.D */ + DIP("FFQR.D w%d, w%d", wd, ws); + calculateMSACSR(ws, wt, FFQRD, 1); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_I64); + t3 = newTemp(Ity_I64); + IRExpr *rm = get_IR_roundingmode_MSA(); + assign(t1, + binop(Iop_SarN64x2, + binop(Iop_InterleaveLO32x4, + getWReg(ws), + getWReg(ws)), + mkU8(32))); + assign(t2, + unop(Iop_ReinterpF64asI64, + binop(Iop_I64StoF64, rm, + unop(Iop_V128to64, + mkexpr(t1))))); + assign(t3, + unop(Iop_ReinterpF64asI64, + binop(Iop_I64StoF64, rm, + unop(Iop_V128HIto64, + mkexpr(t1))))); + putWReg(wd, + triop(Iop_Div64Fx2, rm, + binop(Iop_64HLtoV128, + mkexpr(t3), mkexpr(t2)), + binop(Iop_64HLtoV128, + mkU64(0x41E0000000000000), + mkU64(0x41E0000000000000)))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x19C: { /* FTINT_S.df */ + switch (df) { /* FTINT_S.W */ + case 0x00: { + DIP("FTINT_S.W w%d, w%d", wd, ws); + calculateMSACSR(ws, wd, FTINT_SW, 1); + t1 = newTemp(Ity_I64); + t2 = newTemp(Ity_I64); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_I32); + assign(t3, + binop(Iop_AndV128, + unop(Iop_NotV128, + binop(Iop_CmpUN32Fx4, + getWReg(ws), + getWReg(ws))), + binop(Iop_Max32Fx4, + getWReg(ws), + binop(Iop_64HLtoV128, + mkU64(0xCF000000CF000000), + mkU64(0xCF000000CF000000))))); + IRExpr *rm = get_IR_roundingmode_MSA(); + assign(t1, + binop(Iop_32HLto64, + unop(Iop_ReinterpF32asI32, + binop(Iop_RoundF32toInt, rm, + unop(Iop_ReinterpI32asF32, + binop(Iop_GetElem32x4, + mkexpr(t3), + mkU8(1))))), + unop(Iop_ReinterpF32asI32, + binop(Iop_RoundF32toInt, rm, + unop(Iop_ReinterpI32asF32, + binop(Iop_GetElem32x4, + mkexpr(t3), + mkU8(0))))))); + assign(t2, + binop(Iop_32HLto64, + unop(Iop_ReinterpF32asI32, + binop(Iop_RoundF32toInt, rm, + unop(Iop_ReinterpI32asF32, + binop(Iop_GetElem32x4, + mkexpr(t3), + mkU8(3))))), + unop(Iop_ReinterpF32asI32, + binop(Iop_RoundF32toInt, rm, + unop(Iop_ReinterpI32asF32, + binop(Iop_GetElem32x4, + mkexpr(t3), + mkU8(2))))))); + putWReg(wd, + binop(Iop_64HLtoV128, + mkexpr(t2), mkexpr(t1))); + break; + } + + case 0x01: { /* FTINT_S.D */ + DIP("FTINT_S.D w%d, w%d", wd, ws); + calculateMSACSR(ws, wd, FTINT_SD, 1); + t1 = newTemp(Ity_I64); + t2 = newTemp(Ity_I64); + t3 = newTemp(Ity_V128); + assign(t3, + binop(Iop_AndV128, + unop(Iop_NotV128, + binop(Iop_CmpUN64Fx2, + getWReg(ws), + getWReg(ws))), + binop(Iop_Max64Fx2, + getWReg(ws), + binop(Iop_64HLtoV128, + mkU64(0xC3E0000000000000), + mkU64(0xC3E0000000000000))))); + IRExpr *rm = get_IR_roundingmode_MSA(); + assign(t1, + unop(Iop_ReinterpF64asI64, + binop(Iop_RoundF64toInt, rm, + unop(Iop_ReinterpI64asF64, + unop(Iop_V128to64, + mkexpr(t3)))))); + assign(t2, + unop(Iop_ReinterpF64asI64, + binop(Iop_RoundF64toInt, rm, + unop(Iop_ReinterpI64asF64, + unop(Iop_V128HIto64, + mkexpr(t3)))))); + putWReg(wd, + binop(Iop_64HLtoV128, + mkexpr(t2), mkexpr(t1))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x19D: {/* FTINT_U.df */ + switch (df) { /* FTINT_U.W */ + case 0x00: { + DIP("FTINT_U.W w%d, w%d", wd, ws); + calculateMSACSR(ws, wd, FTINT_UW, 1); + t1 = newTemp(Ity_I64); + t2 = newTemp(Ity_I64); + t3 = newTemp(Ity_V128); + t4 = newTemp(Ity_V128); + IRExpr *rm = get_IR_roundingmode_MSA(); + assign(t1, + binop(Iop_32HLto64, + unop(Iop_ReinterpF32asI32, + binop(Iop_RoundF32toInt, rm, + unop(Iop_ReinterpI32asF32, + binop(Iop_GetElem32x4, + getWReg(ws), + mkU8(1))))), + unop(Iop_ReinterpF32asI32, + binop(Iop_RoundF32toInt, rm, + unop(Iop_ReinterpI32asF32, + binop(Iop_GetElem32x4, + getWReg(ws), + mkU8(0))))))); + assign(t2, + binop(Iop_32HLto64, + unop(Iop_ReinterpF32asI32, + binop(Iop_RoundF32toInt, rm, + unop(Iop_ReinterpI32asF32, + binop(Iop_GetElem32x4, + getWReg(ws), + mkU8(3))))), + unop(Iop_ReinterpF32asI32, + binop(Iop_RoundF32toInt, rm, + unop(Iop_ReinterpI32asF32, + binop(Iop_GetElem32x4, + getWReg(ws), + mkU8(2))))))); + assign(t3, + unop(Iop_NotV128, + binop(Iop_SarN32x4, + getWReg(ws), + mkU8(31)))); + assign(t4, + binop(Iop_CmpLT32Fx4, + getWReg(ws), + binop(Iop_64HLtoV128, + mkU64(0x4EFFFFFF4EFFFFFF), + mkU64(0x4EFFFFFF4EFFFFFF)))); + putWReg(wd, + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(t4), + binop(Iop_AndV128, + binop(Iop_64HLtoV128, + mkexpr(t2), + mkexpr(t1)), + mkexpr(t3))), + binop(Iop_AndV128, + unop(Iop_NotV128, mkexpr(t4)), + unop(Iop_FtoI32Ux4_RZ, + getWReg(ws))))); + break; + } + + case 0x01: { /* FTINT_U.D */ + DIP("FTINT_U.D w%d, w%d", wd, ws); + calculateMSACSR(ws, wd, FTINT_UD, 1); + t1 = newTemp(Ity_I64); + t2 = newTemp(Ity_I64); + IRExpr *rm = get_IR_roundingmode_MSA(); + assign(t1, + binop(Iop_F64toI64U, rm, + unop(Iop_ReinterpI64asF64, + unop(Iop_V128to64, + getWReg(ws))))); + assign(t2, + binop(Iop_F64toI64U, rm, + unop(Iop_ReinterpI64asF64, + unop(Iop_V128HIto64, + getWReg(ws))))); + putWReg(wd, + binop(Iop_64HLtoV128, + mkexpr(t2), mkexpr(t1))); + break; + } + + default: + return -1; + } + + break; + } + + case 0x19E: { /* FFINT_S.df */ + t1 = newTemp(Ity_V128); + assign(t1, getWReg(ws)); + IRExpr *rm = get_IR_roundingmode_MSA(); + + switch (df) { + case 0x00: { /* FFINT_S.W */ + DIP("FFINT_S.W w%d, w%d", wd, ws); + calculateMSACSR(ws, wt, FFINTSW, 1); + IRTemp tmp[4]; + Int i; + + for (i = 0; i < 4; i++) { + tmp[i] = newTemp(Ity_F32); + assign(tmp[i], + binop(Iop_I32StoF32, rm, + binop(Iop_GetElem32x4, + mkexpr(t1), mkU8(i)))); + } + + putWReg(wd, + binop(Iop_64HLtoV128, + binop(Iop_32HLto64, + unop(Iop_ReinterpF32asI32, + mkexpr(tmp[3])), + unop(Iop_ReinterpF32asI32, + mkexpr(tmp[2]))), + binop(Iop_32HLto64, + unop(Iop_ReinterpF32asI32, + mkexpr(tmp[1])), + unop(Iop_ReinterpF32asI32, + mkexpr(tmp[0]))))); + break; + } - putIReg(rt, binop(Iop_Or32, - mkexpr(t1), - binop(Iop_And32, - getIReg(rs), mkexpr(t2)))); + case 0x01: { /* FFINT_S.D */ + DIP("FFINT_S.D w%d, w%d", wd, ws); + calculateMSACSR(ws, wt, FFINTSD, 1); + IRTemp tmp[2]; + Int i; + + for (i = 0; i < 2; i++) { + tmp[i] = newTemp(Ity_F64); + assign(tmp[i], + binop(Iop_I64StoF64, rm, + binop(Iop_GetElem64x2, + mkexpr(t1), mkU8(i)))); } + + putWReg(wd, + binop(Iop_64HLtoV128, + unop(Iop_ReinterpF64asI64, + mkexpr(tmp[1])), + unop(Iop_ReinterpF64asI64, + mkexpr(tmp[0])))); break; } - case 0x1: { /* PREPEND */ - DIP("prepend r%u, r%u, %u", rt, rs, rd); - vassert(!mode64); - t1 = newTemp(Ity_I32); - t2 = newTemp(Ity_I32); - t3 = newTemp(Ity_I32); - if (0 != rd) { - assign(t1, binop(Iop_Shr32, getIReg(rt), mkU8(rd))); + default: + return -1; + } - if (31 == rd) { - putIReg(rt, binop(Iop_Or32, - mkexpr(t1), - binop(Iop_Shl32, - binop(Iop_And32, - getIReg(rs), - mkU32(0x7fffffff)), - mkU8(1)))); - } else if (1 == rd) { - putIReg(rt, binop(Iop_Or32, - mkexpr(t1), - binop(Iop_Shl32, - binop(Iop_And32, - getIReg(rs), - mkU32(0x1)), - mkU8(31)))); - } else { - assign(t2, binop(Iop_Add32, mkU32(rd), mkU32(0x1))); + break; + } - assign(t3, unop(Iop_Not32, - binop(Iop_Shl32, - mkU32(0xffffffff), - unop(Iop_32to8, mkexpr(t2))))); + case 0x19F: { /* FFINT_U.df */ + IRExpr *rm = get_IR_roundingmode_MSA(); - putIReg(rt, binop(Iop_Or32, - mkexpr(t1), - binop(Iop_Shl32, - binop(Iop_And32, - getIReg(rs), - mkexpr(t3)), - mkU8(32-rd)))); - } - } + switch (df) { + case 0x00: { /* FFINT_U.W */ + DIP("FFINT_U.W w%d, w%d", wd, ws); + calculateMSACSR(ws, wt, FFINT_UW, 1); + putWReg(wd, unop(Iop_I32UtoFx4, getWReg(ws))); break; } - case 0x10: { /* BALIGN */ - DIP("balign r%u, r%u, %u", rt, rs, rd); - vassert(!mode64); - t1 = newTemp(Ity_I32); - t2 = newTemp(Ity_I32); - t3 = newTemp(Ity_I32); - if ((2 != rd) && (0 != rd)) { - assign(t1, binop(Iop_Shl32, - binop(Iop_And32, - mkU32(rd), mkU32(0x3)), - mkU8(0x3))); - assign(t2, binop(Iop_Shl32, - getIReg(rt), - unop(Iop_32to8, mkexpr(t1)))); - assign(t3, binop(Iop_Shr32, - getIReg(rs), - unop(Iop_32to8, - binop(Iop_Shl32, - binop(Iop_Sub32, - mkU32(0x4), - binop(Iop_And32, - mkU32(rd), - mkU32(0x3))), - mkU8(0x3))))); - putIReg(rt, binop(Iop_Or32, mkexpr(t2), mkexpr(t3))); - } + case 0x01: { /* FFINT_U.D */ + DIP("FFINT_U.D w%d, w%d", + wd, ws); + calculateMSACSR(ws, wt, + FFINT_UD, 1); + t1 = newTemp(Ity_I64); + t2 = newTemp(Ity_I64); + assign(t1, + unop(Iop_ReinterpF64asI64, + binop(Iop_I64UtoF64, rm, + unop(Iop_V128to64, + getWReg(ws))))); + assign(t2, + unop(Iop_ReinterpF64asI64, + binop(Iop_I64UtoF64, rm, + unop(Iop_V128HIto64, + getWReg(ws))))); + putWReg(wd, + binop(Iop_64HLtoV128, + mkexpr(t2), mkexpr(t1))); break; } - default: - return -1; - } - break; /* end of APPEND */ + + default: + return -1; } - default: - return -1; + + break; } - break; - } + default: - return -1; + return -1; + } + + return 0; +} + +static Int msa_MI10_load(UInt cins, UChar wd, UChar ws) { /* MI10 (0x20) */ + IRTemp t1; + UShort i10; + UChar df; + + i10 = (cins & 0x03FF0000) >> 16; + df = cins & 0x00000003; + + switch (df) { + case 0x00: { /* LD.B */ + DIP("LD.B w%d, %d(r%d)", wd, ws, i10); + LOAD_STORE_PATTERN_MSA(i10); + putWReg(wd, load(Ity_V128, mkexpr(t1))); + break; + } + + case 0x01: { /* LD.H */ + DIP("LD.H w%d, %d(r%d)", wd, ws, i10); + LOAD_STORE_PATTERN_MSA(i10 << 1); +#if defined (_MIPSEL) + putWReg(wd, load(Ity_V128, mkexpr(t1))); +#elif defined (_MIPSEB) + putWReg(wd, + unop(Iop_Reverse8sIn16_x8, + load(Ity_V128, mkexpr(t1)))); +#endif + break; + } + + case 0x02: { /* LD.W */ + DIP("LD.W w%d, %d(r%d)", wd, ws, i10); + LOAD_STORE_PATTERN_MSA(i10 << 2); +#if defined (_MIPSEL) + putWReg(wd, load(Ity_V128, mkexpr(t1))); +#elif defined (_MIPSEB) + putWReg(wd, + unop(Iop_Reverse8sIn32_x4, + load(Ity_V128, mkexpr(t1)))); +#endif + break; + } + + case 0x03: { /* LD.D */ + DIP("LD.D w%d, %d(r%d)", wd, ws, i10); + LOAD_STORE_PATTERN_MSA(i10 << 3); +#if defined (_MIPSEL) + putWReg(wd, load(Ity_V128, mkexpr(t1))); +#elif defined (_MIPSEB) + putWReg(wd, + unop(Iop_Reverse8sIn64_x2, + load(Ity_V128, mkexpr(t1)))); +#endif + break; + } + + default: + return -1; + } + + return 0; +} + +static Int msa_MI10_store(UInt cins, UChar wd, UChar ws) { /* MI10 (0x24) */ + IRTemp t1; + UShort i10; + UChar df; + + df = cins & 0x00000003; + i10 = (cins & 0x03FF0000) >> 16; + + switch (df) { + case 0x00: { /* ST.B */ + DIP("ST.B w%d, %d(r%d)", wd, ws, i10); + LOAD_STORE_PATTERN_MSA(i10); + store(mkexpr(t1), getWReg(wd)); + break; + } + + case 0x01: { /* ST.H */ + DIP("ST.H w%d, %d(r%d)", wd, ws, i10); + LOAD_STORE_PATTERN_MSA(i10 << 1); +#if defined (_MIPSEL) + store(mkexpr(t1), getWReg(wd)); +#elif defined (_MIPSEB) + store(mkexpr(t1), + unop(Iop_Reverse8sIn16_x8, getWReg(wd))); +#endif + break; + } + + case 0x02: { /* ST.W */ + DIP("ST.W w%d, %d(r%d)", wd, ws, i10); + LOAD_STORE_PATTERN_MSA(i10 << 2); +#if defined (_MIPSEL) + store(mkexpr(t1), getWReg(wd)); +#elif defined (_MIPSEB) + store(mkexpr(t1), + unop(Iop_Reverse8sIn32_x4, getWReg(wd))); +#endif + break; + } + + case 0x03: { /* ST.D */ + DIP("ST.D w%d, %d(r%d)", wd, ws, i10); + LOAD_STORE_PATTERN_MSA(i10 << 3); +#if defined (_MIPSEL) + store(mkexpr(t1), getWReg(wd)); +#elif defined (_MIPSEB) + store(mkexpr(t1), + unop(Iop_Reverse8sIn64_x2, getWReg(wd))); +#endif + break; + } + + default: + return -1; } + return 0; } +/*------------------------------------------------------------*/ +/*--- Disassemble a single MIPS MSA (SIMD) instruction ---*/ +/*--- Return values: ---*/ +/*--- 0: Success ---*/ +/*--- -1: Decode failure (unknown instruction) ---*/ +/*--- -2: Illegal instruction ---*/ +/*------------------------------------------------------------*/ +static Int disMSAInstr_MIPS_WRK ( UInt cins ) { + UChar minor_opcode, wd, ws; + + vassert(has_msa); + vassert((cins & 0xFC000000) == 0x78000000); + + minor_opcode = (cins & 0x20) > 0 ? (cins & 0x3C) : (cins & 0x3F); + wd = (cins & 0x000007C0) >> 6; + ws = (cins & 0x0000F800) >> 11; + + switch (minor_opcode) { + case 0x0: + return msa_I8_logical(cins, wd, ws); + + case 0x01: + return msa_I8_branch(cins, wd, ws); + + case 0x02: + return msa_I8_shift(cins, wd, ws); + + case 0x06: + return msa_I5_06(cins, wd, ws); + + case 0x07: + return msa_I5_07(cins, wd, ws); + + case 0x09: + return msa_BIT_09(cins, wd, ws); + + case 0x0A: + return msa_BIT_0A(cins, wd, ws); + + case 0x0D: + return msa_3R_0D(cins, wd, ws); + + case 0x0E: + return msa_3R_0E(cins, wd, ws); + + case 0x0F: + return msa_3R_0F(cins, wd, ws); + + case 0x10: + return msa_3R_10(cins, wd, ws); + + case 0x11: + return msa_3R_11(cins, wd, ws); + + case 0x12: + return msa_3R_12(cins, wd, ws); + + case 0x13: + return msa_3R_13(cins, wd, ws); + + case 0x14: + return msa_3R_14(cins, wd, ws); + + case 0x15: + return msa_3R_15(cins, wd, ws); + + case 0x19: + return msa_ELM(cins, wd, ws); + + case 0x1A: + return msa_3R_1A(cins, wd, ws); + + case 0x1B: + return msa_3R_1B(cins, wd, ws); + + case 0x1C: + return msa_3R_1C(cins, wd, ws); + + case 0x1E: + if ((cins & 0x03000000) == 0) + return msa_VEC(cins, wd, ws); + else if ((cins & 0x00200000) == 0) + return msa_2R(cins, wd, ws); + else + return msa_2RF(cins, wd, ws); + + case 0x20: + return msa_MI10_load(cins, wd, ws); + + case 0x24: + return msa_MI10_store(cins, wd, ws); + } + + return -1; +} + /*------------------------------------------------------------*/ /*--- Disassemble a single instruction ---*/ /*------------------------------------------------------------*/ @@ -12380,6 +26066,129 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *, break; } else goto decode_failure; + } else if (fmt >= 0x1c && has_msa) { /* BNZ.df */ + Int df = fmt & 3; + t0 = newTemp(Ity_I32); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ft)); + assign(t2, binop(Iop_64HLtoV128, mkU64(0), mkU64(0))); + + switch (df) { + case 0x00: { /* BNZ.B */ + DIP("BNZ.B w%d, %d", ft, imm); + assign(t3, binop(Iop_CmpEQ8x16, mkexpr(t1), mkexpr(t2))); + break; + } + + case 0x01: { /* BNZ.H */ + DIP("BNZ.H w%d, %d", ft, imm); + assign(t3, binop(Iop_CmpEQ16x8, mkexpr(t1), mkexpr(t2))); + break; + } + + case 0x02: { /* BNZ.W */ + DIP("BNZ.W w%d, %d", ft, imm); + assign(t3, binop(Iop_CmpEQ32x4, mkexpr(t1), mkexpr(t2))); + break; + } + + case 0x03: { /* BNZ.D */ + DIP("BNZ.D w%d, %d", ft, imm); + assign(t3, binop(Iop_CmpEQ64x2, mkexpr(t1), mkexpr(t2))); + break; + } + } + + assign(t0, + binop(Iop_Or32, + binop(Iop_Or32, + unop(Iop_V128to32, mkexpr(t3)), + unop(Iop_64HIto32, unop(Iop_V128to64, mkexpr(t3)))), + binop(Iop_Or32, + unop(Iop_64to32, + unop(Iop_V128HIto64, mkexpr(t3))), + unop(Iop_64HIto32, + unop(Iop_V128HIto64, mkexpr(t3)))))); + dis_branch(False, + binop(Iop_CmpEQ32, mkexpr(t0), mkU32(0)), imm, &bstmt); + } else if (fmt == 0x0F && has_msa) { /* BNZ.V */ + t0 = newTemp(Ity_I32); + t1 = newTemp(Ity_V128); + assign(t1, getWReg(ft)); + assign(t0, + binop(Iop_Or32, + binop(Iop_Or32, + unop(Iop_V128to32, mkexpr(t1)), + unop(Iop_64HIto32, unop(Iop_V128to64, mkexpr(t1)))), + binop(Iop_Or32, + unop(Iop_64to32, unop(Iop_V128HIto64, mkexpr(t1))), + unop(Iop_64HIto32, + unop(Iop_V128HIto64, mkexpr(t1)))))); + dis_branch(False, + binop(Iop_CmpNE32, mkexpr(t0), mkU32(0)), imm, &bstmt); + } else if (fmt >= 0x18 && has_msa) { /* BZ.df */ + Int df = fmt & 3; + t0 = newTemp(Ity_I32); + t1 = newTemp(Ity_V128); + t2 = newTemp(Ity_V128); + t3 = newTemp(Ity_V128); + assign(t1, getWReg(ft)); + assign(t2, binop(Iop_64HLtoV128, mkU64(0), mkU64(0))); + + switch (df) { + case 0x00: { /* BZ.B */ + DIP("BZ.B w%d, %d", ft, imm); + assign(t3, binop(Iop_CmpEQ8x16, mkexpr(t1), mkexpr(t2))); + break; + } + + case 0x01: { /* BZ.H */ + DIP("BZ.H w%d, %d", ft, imm); + assign(t3, binop(Iop_CmpEQ16x8, mkexpr(t1), mkexpr(t2))); + break; + } + + case 0x02: { /* BZ.W */ + DIP("BZ.W w%d, %d", ft, imm); + assign(t3, binop(Iop_CmpEQ32x4, mkexpr(t1), mkexpr(t2))); + break; + } + + case 0x03: { /* BZ.D */ + DIP("BZ.D w%d, %d", ft, imm); + assign(t3, binop(Iop_CmpEQ64x2, mkexpr(t1), mkexpr(t2))); + break; + } + } + + assign(t0, + binop(Iop_Or32, + binop(Iop_Or32, + unop(Iop_V128to32, mkexpr(t3)), + unop(Iop_64HIto32, unop(Iop_V128to64, mkexpr(t3)))), + binop(Iop_Or32, + unop(Iop_64to32, unop(Iop_V128HIto64, mkexpr(t3))), + unop(Iop_64HIto32, + unop(Iop_V128HIto64, mkexpr(t3)))))); + dis_branch(False, + binop(Iop_CmpNE32, mkexpr(t0), mkU32(0)), imm, &bstmt); + } else if (fmt == 0x0B && has_msa) { /* BZ.V */ + t0 = newTemp(Ity_I32); + t1 = newTemp(Ity_V128); + assign(t1, getWReg(ft)); + assign(t0, + binop(Iop_Or32, + binop(Iop_Or32, + unop(Iop_V128to32, mkexpr(t1)), + unop(Iop_64HIto32, unop(Iop_V128to64, mkexpr(t1)))), + binop(Iop_Or32, + unop(Iop_64to32, unop(Iop_V128HIto64, mkexpr(t1))), + unop(Iop_64HIto32, + unop(Iop_V128HIto64, mkexpr(t1)))))); + dis_branch(False, + binop(Iop_CmpEQ32, mkexpr(t0), mkU32(0)), imm, &bstmt); } else { switch (function) { case 0x4: { /* SQRT.fmt */ @@ -13676,47 +27485,45 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *, DIP("madd.s f%u, f%u, f%u, f%u", fd, fmt, fs, ft); IRExpr *rm = get_IR_roundingmode(); t1 = newTemp(Ity_F32); - assign(t1, qop(Iop_MAddF32, rm, - getLoFromF64(tyF, getFReg(fmt)), - getLoFromF64(tyF, getFReg(fs)), - getLoFromF64(tyF, getFReg(ft)))); + assign(t1, triop(Iop_AddF32, rm, getLoFromF64(tyF, getFReg(fmt)), + triop(Iop_MulF32, rm, getLoFromF64(tyF, getFReg(fs)), + getLoFromF64(tyF, getFReg(ft))))); putFReg(fd, mkWidenFromF32(tyF, mkexpr(t1))); break; /* MADD.S */ } case 0x21: { /* MADD.D */ DIP("madd.d f%u, f%u, f%u, f%u", fd, fmt, fs, ft); IRExpr *rm = get_IR_roundingmode(); - putDReg(fd, qop(Iop_MAddF64, rm, getDReg(fmt), getDReg(fs), - getDReg(ft))); + putDReg(fd, triop(Iop_AddF64, rm, getDReg(fmt), + triop(Iop_MulF64, rm, getDReg(fs), + getDReg(ft)))); break; /* MADD.D */ } case 0x28: { /* MSUB.S */ DIP("msub.s f%u, f%u, f%u, f%u", fd, fmt, fs, ft); IRExpr *rm = get_IR_roundingmode(); t1 = newTemp(Ity_F32); - assign(t1, qop(Iop_MSubF32, rm, - getLoFromF64(tyF, getFReg(fmt)), - getLoFromF64(tyF, getFReg(fs)), - getLoFromF64(tyF, getFReg(ft)))); + assign(t1, triop(Iop_SubF32, rm, + triop(Iop_MulF32, rm, getLoFromF64(tyF, getFReg(fs)), + getLoFromF64(tyF, getFReg(ft))), + getLoFromF64(tyF, getFReg(fmt)))); putFReg(fd, mkWidenFromF32(tyF, mkexpr(t1))); break; /* MSUB.S */ } case 0x29: { /* MSUB.D */ DIP("msub.d f%u, f%u, f%u, f%u", fd, fmt, fs, ft); IRExpr *rm = get_IR_roundingmode(); - putDReg(fd, qop(Iop_MSubF64, rm, getDReg(fmt), getDReg(fs), - getDReg(ft))); + putDReg(fd, triop(Iop_SubF64, rm, triop(Iop_MulF64, rm, getDReg(fs), + getDReg(ft)), getDReg(fmt))); break; /* MSUB.D */ } case 0x30: { /* NMADD.S */ DIP("nmadd.s f%u, f%u, f%u, f%u", fd, fmt, fs, ft); IRExpr *rm = get_IR_roundingmode(); t1 = newTemp(Ity_F32); - assign(t1, qop(Iop_MAddF32, rm, - getLoFromF64(tyF, getFReg(fmt)), - getLoFromF64(tyF, getFReg(fs)), - getLoFromF64(tyF, getFReg(ft)))); - + assign(t1, triop(Iop_AddF32, rm, getLoFromF64(tyF, getFReg(fmt)), + triop(Iop_MulF32, rm, getLoFromF64(tyF, getFReg(fs)), + getLoFromF64(tyF, getFReg(ft))))); putFReg(fd, mkWidenFromF32(tyF, unop(Iop_NegF32, mkexpr(t1)))); break; /* NMADD.S */ } @@ -13724,8 +27531,9 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *, DIP("nmadd.d f%u, f%u, f%u, f%u", fd, fmt, fs, ft); IRExpr *rm = get_IR_roundingmode(); t1 = newTemp(Ity_F64); - assign(t1, qop(Iop_MAddF64, rm, getDReg(fmt), getDReg(fs), - getDReg(ft))); + assign(t1, triop(Iop_AddF64, rm, getDReg(fmt), + triop(Iop_MulF64, rm, getDReg(fs), + getDReg(ft)))); putDReg(fd, unop(Iop_NegF64, mkexpr(t1))); break; /* NMADD.D */ } @@ -13733,11 +27541,10 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *, DIP("nmsub.s f%u, f%u, f%u, f%u", fd, fmt, fs, ft); IRExpr *rm = get_IR_roundingmode(); t1 = newTemp(Ity_F32); - assign(t1, qop(Iop_MSubF32, rm, - getLoFromF64(tyF, getFReg(fmt)), - getLoFromF64(tyF, getFReg(fs)), - getLoFromF64(tyF, getFReg(ft)))); - + assign(t1, triop(Iop_SubF32, rm, + triop(Iop_MulF32, rm, getLoFromF64(tyF, getFReg(fs)), + getLoFromF64(tyF, getFReg(ft))), + getLoFromF64(tyF, getFReg(fmt)))); putFReg(fd, mkWidenFromF32(tyF, unop(Iop_NegF32, mkexpr(t1)))); break; /* NMSUBB.S */ } @@ -13745,8 +27552,8 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *, DIP("nmsub.d f%u, f%u, f%u, f%u", fd, fmt, fs, ft); IRExpr *rm = get_IR_roundingmode(); t1 = newTemp(Ity_F64); - assign(t1, qop(Iop_MSubF64, rm, getDReg(fmt), getDReg(fs), - getDReg(ft))); + assign(t1, triop(Iop_SubF64, rm, triop(Iop_MulF64, rm, getDReg(fs), + getDReg(ft)), getDReg(fmt))); putDReg(fd, unop(Iop_NegF64, mkexpr(t1))); break; /* NMSUBB.D */ } @@ -16069,6 +29876,38 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *, } } + case 0x05: /* LSA */ + if (has_msa) { + UInt imm2 = (imm & 0xC0) >> 6; + DIP("lsa r%u, r%u, r%u, imm: 0x%x", rd, rs, rt, imm2); + if (mode64) { + putIReg(rd, + unop(Iop_32Sto64, + binop(Iop_Add32, + binop(Iop_Shl32, + unop(Iop_64to32, getIReg(rs)), + mkU8(imm2 + 1)), + unop(Iop_64to32, getIReg(rt))))); + } else { + putIReg(rd, + binop(Iop_Add32, + binop(Iop_Shl32, getIReg(rs), mkU8(imm2 + 1)), + getIReg(rt))); + } + } else { + ILLEGAL_INSTRUCTON; + } + break; + case 0x15:{ /* DLSA */ + UInt imm2 = (imm & 0xC0) >> 6; + DIP("dlsa r%u, r%u, r%u, imm: 0x%x", rd, rs, rt, imm2); + putIReg(rd, + binop(Iop_Add64, + binop(Iop_Shl64, getIReg(rs), mkU8(imm2 + 1)), + getIReg(rt))); + break; + } + case 0x0D: /* BREAK */ DIP("break 0x%x", trap_code); if (mode64) @@ -17165,6 +31004,20 @@ static DisResult disInstr_MIPS_WRK ( Bool(*resteerOkFn) (/*opaque */void *, goto decode_failure; } + case 0x1E: /* MIPS MSA (SIMD) */ + if (has_msa) { + Int retVal = disMSAInstr_MIPS_WRK(cins); + if (retVal == 0) { + break; + } else if (retVal == -2) { + ILLEGAL_INSTRUCTON + break; + } + } + vex_printf("Error occured while trying to decode MIPS MSA " + "instruction.\nYour platform probably doesn't support " + "MIPS MSA (SIMD) ASE.\n"); + default: goto decode_failure; @@ -17296,6 +31149,7 @@ DisResult disInstr_MIPS( IRSB* irsb_IN, mode64 = guest_arch != VexArchMIPS32; fp_mode64 = abiinfo->guest_mips_fp_mode64; + has_msa = VEX_MIPS_PROC_MSA(archinfo->hwcaps); vassert(VEX_MIPS_HOST_FP_MODE(archinfo->hwcaps) >= fp_mode64); diff --git a/VEX/priv/host_mips_defs.c b/VEX/priv/host_mips_defs.c index 35a293b722..f62a410d11 100644 --- a/VEX/priv/host_mips_defs.c +++ b/VEX/priv/host_mips_defs.c @@ -91,6 +91,25 @@ const RRegUniverse* getRRegUniverse_MIPS ( Bool mode64 ) ru->regs[ru->size++] = hregMIPS_F30(mode64); ru->allocable_end[(mode64) ? HRcFlt64 : HRcFlt32] = ru->size - 1; + ru->allocable_start[HRcVec128] = ru->size; + ru->regs[ru->size++] = hregMIPS_W16(mode64); + ru->regs[ru->size++] = hregMIPS_W17(mode64); + ru->regs[ru->size++] = hregMIPS_W18(mode64); + ru->regs[ru->size++] = hregMIPS_W19(mode64); + ru->regs[ru->size++] = hregMIPS_W20(mode64); + ru->regs[ru->size++] = hregMIPS_W21(mode64); + ru->regs[ru->size++] = hregMIPS_W22(mode64); + ru->regs[ru->size++] = hregMIPS_W23(mode64); + ru->regs[ru->size++] = hregMIPS_W24(mode64); + ru->regs[ru->size++] = hregMIPS_W25(mode64); + ru->regs[ru->size++] = hregMIPS_W26(mode64); + ru->regs[ru->size++] = hregMIPS_W27(mode64); + ru->regs[ru->size++] = hregMIPS_W28(mode64); + ru->regs[ru->size++] = hregMIPS_W29(mode64); + ru->regs[ru->size++] = hregMIPS_W30(mode64); + ru->regs[ru->size++] = hregMIPS_W31(mode64); + ru->allocable_end[HRcVec128] = ru->size - 1; + if (!mode64) { /* Fake double floating point */ ru->allocable_start[HRcFlt64] = ru->size; @@ -157,6 +176,13 @@ UInt ppHRegMIPS(HReg reg, Bool mode64) "$d8", "$d9", "$d10", "$d11", "$d12", "$d13", "$d14", "$d15", }; + static const HChar *fvec128_names[32] + = { "$w0", "$w1", "$w2", "$w3", "$w4", "$w5", "$w6", "$w7", + "$w8", "$w9", "$w10", "$w11", "$w12", "$w13", "$w14", "$w15", + "$w16", "$w17", "$w18", "$w19", "$w20", "$w21", "$w22", "$w23", + "$w24", "$w24", "$w26", "$w27", "$w28", "$w29", "$w30", "$w31" + }; + /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { return ppHReg(reg); @@ -164,7 +190,8 @@ UInt ppHRegMIPS(HReg reg, Bool mode64) /* But specific for real regs. */ vassert(hregClass(reg) == HRcInt32 || hregClass(reg) == HRcInt64 || - hregClass(reg) == HRcFlt32 || hregClass(reg) == HRcFlt64); + hregClass(reg) == HRcFlt32 || hregClass(reg) == HRcFlt64 || + hregClass(reg) == HRcVec128); /* But specific for real regs. */ switch (hregClass(reg)) { @@ -184,6 +211,10 @@ UInt ppHRegMIPS(HReg reg, Bool mode64) r = hregEncoding(reg); vassert(r >= 0 && r < 32); return vex_printf("%s", freg64_names[r]); + case HRcVec128: + r = hregEncoding(reg); + vassert(r >= 0 && r < 32); + return vex_printf("%s", fvec128_names[r]); default: vpanic("ppHRegMIPS"); break; @@ -754,6 +785,466 @@ const HChar *showMIPSMaccOp(MIPSMaccOp op, Bool variable) return ret; } +HChar showMsaDF(MSADF df) { + switch (df) { + case MSA_B: + return 'b'; + + case MSA_H: + return 'h'; + + case MSA_W: + return 'w'; + + case MSA_D: + return 'd'; + } + + return '?'; +} + +HChar showMsaDFF(MSADFFlx df, int op) { + switch (df) { + case MSA_F_DW: + if (op == MSA_MUL_Q || op == MSA_MULR_Q || op == MSA_FEXDO) return 'w'; + else return 'd'; + + case MSA_F_WH: + if (op == MSA_MUL_Q || op == MSA_MULR_Q || op == MSA_FEXDO) return 'h'; + else return 'w'; + } + + return '?'; +} + +const HChar *showMsaMI10op(MSAMI10Op op) { + const HChar *ret; + + switch (op) { + case MSA_LD: + ret = "ld"; + break; + + case MSA_ST: + ret = "st"; + break; + + default: + vpanic("showMsaMI10op"); + break; + } + + return ret; +} + +const HChar *showMsaElmOp(MSAELMOp op) { + const HChar *ret; + + switch (op) { + case MSA_MOVE: + ret = "move.v"; + break; + + case MSA_INSERT: + ret = "insert"; + break; + + case MSA_COPY_U: + ret = "copy_u"; + break; + + case MSA_COPY_S: + ret = "copy_s"; + break; + + case MSA_SLDI: + ret = "sldi"; + break; + + case MSA_INSVE: + ret = "insve"; + break; + + case MSA_CFCMSA: + ret = "cfcmsa"; + break; + + case MSA_CTCMSA: + ret = "ctcmsa"; + break; + + default: + vpanic("showMsaElmOp"); + break; + } + + return ret; +} + +const HChar *showMsa2ROp(MSA2ROp op) { + const HChar *ret; + + switch (op) { + case MSA_NLZC: + ret = "nlzc"; + break; + + case MSA_NLOC: + ret = "nloc"; + break; + + case MSA_FILL: + ret = "fill"; + break; + + case MSA_PCNT: + ret = "pcnt"; + break; + + default: + vpanic("showMsa2ROp"); + break; + } + + return ret; +} + +const HChar *showMsa2RFOp(MSA2RFOp op) { + const HChar *ret; + + switch (op) { + case MSA_FTRUNC_S: + ret = "ftrunc_s"; + break; + + case MSA_FTRUNC_U: + ret = "ftrunc_u"; + break; + + case MSA_FFINT_S: + ret = "ffint_s"; + break; + + case MSA_FFINT_U: + ret = "ffint_u"; + break; + + case MSA_FSQRT: + ret = "fsqrt"; + break; + + case MSA_FRSQRT: + ret = "frsqrt"; + break; + + case MSA_FRCP: + ret = "frcp"; + break; + + case MSA_FEXUPR: + ret = "fexupr"; + break; + + case MSA_FTINT_U: + ret = "ftint_u"; + break; + + case MSA_FTINT_S: + ret = "ftint_s"; + break; + + case MSA_FLOG2: + ret = "flog2"; + break; + + default: + vpanic("showMsa2RFOp"); + break; + } + + return ret; +} + +const HChar *showMsa3ROp(MSA3ROp op) { + const HChar *ret; + + switch (op) { + case MSA_ADDV: + ret = "addv"; + break; + + case MSA_ADD_A: + ret = "add_a"; + break; + + case MSA_SUBV: + ret = "subv"; + break; + + case MSA_ADDS_S: + ret = "adds_s"; + break; + + case MSA_ADDS_U: + ret = "adds_u"; + break; + + case MSA_SUBS_S: + ret = "subs_s"; + break; + + case MSA_SUBS_U: + ret = "subs_u"; + break; + + case MSA_MAX_S: + ret = "max_s"; + break; + + case MSA_MAX_U: + ret = "max_u"; + break; + + case MSA_MIN_S: + ret = "min_s"; + break; + + case MSA_MIN_U: + ret = "min_u"; + break; + + case MSA_SLL: + ret = "sll"; + break; + + case MSA_SRL: + ret = "srl"; + break; + + case MSA_SRA: + ret = "sra"; + break; + + case MSA_CEQ: + ret = "ceq"; + break; + + case MSA_CLT_S: + ret = "clt_s"; + break; + + case MSA_CLT_U: + ret = "clt_u"; + break; + + case MSA_ILVL: + ret = "ilvl"; + break; + + case MSA_ILVR: + ret = "ilvr"; + break; + + case MSA_ILVEV: + ret = "ilvev"; + break; + + case MSA_ILVOD: + ret = "ilvod"; + break; + + case MSA_PCKEV: + ret = "ilvev"; + break; + + case MSA_PCKOD: + ret = "ilvod"; + break; + + case MSA_AVER_S: + ret = "aver_s"; + break; + + case MSA_AVER_U: + ret = "aver_u"; + break; + + case MSA_SLD: + ret = "sld"; + break; + + case MSA_SPLAT: + ret = "splat"; + break; + + case MSA_MULV: + ret = "mulv"; + break; + + case MSA_DIVS: + ret = "divs"; + break; + + case MSA_DIVU: + ret = "divu"; + break; + + case MSA_VSHF: + ret = "vshf"; + break; + + default: + vpanic("showMsa3ROp"); + break; + } + + return ret; +} + +const HChar *showMsaVecOp(MSAVECOp op) { + const HChar *ret; + + switch (op) { + case MSA_ANDV: + ret = "and.v"; + break; + + case MSA_ORV: + ret = "or.v"; + break; + + case MSA_XORV: + ret = "xor.v"; + break; + + case MSA_NORV: + ret = "nor.v"; + break; + + default: + vpanic("showMsaVecOp"); + break; + } + + return ret; +} + +const HChar *showMsaBitOp(MSABITOp op) { + const HChar *ret; + + switch (op) { + case MSA_SLLI: + ret = "slli"; + break; + + case MSA_SRAI: + ret = "srai"; + break; + + case MSA_SRLI: + ret = "srli"; + break; + + case MSA_SAT_S: + ret = "sat_s"; + break; + + case MSA_SRARI: + ret = "srari"; + break; + + default: + vpanic("showMsaBitOp"); + break; + } + + return ret; +} + +const HChar *showMsa3RFOp(MSA3RFOp op) { + const HChar *ret; + + switch (op) { + case MSA_FADD: + ret = "fadd"; + break; + + case MSA_FSUB: + ret = "fsub"; + break; + + case MSA_FMUL: + ret = "fmul"; + break; + + case MSA_FDIV: + ret = "fdiv"; + break; + + case MSA_MUL_Q: + ret = "mul_q"; + break; + + case MSA_MULR_Q: + ret = "mulr_q"; + break; + + case MSA_FCEQ: + ret = "fceq"; + break; + + case MSA_FCLT: + ret = "fclt"; + break; + + case MSA_FCUN: + ret = "fcun"; + break; + + case MSA_FEXP2: + ret = "fexp2"; + break; + + case MSA_FMIN: + ret = "fmin"; + break; + + case MSA_FMIN_A: + ret = "fmin_a"; + break; + + case MSA_FMAX: + ret = "fmax"; + break; + + case MSA_FMADD: + ret = "fmadd"; + break; + + case MSA_FMSUB: + ret = "fmsub"; + break; + + case MSA_FEXDO: + ret = "fexdo"; + break; + + case MSA_FTQ: + ret = "ftq"; + break; + + case MSA_FCLE: + ret = "fcle"; + break; + + default: + vpanic("showMsa3RFOp"); + break; + } + + return ret; +} + MIPSInstr *MIPSInstr_LI(HReg dst, ULong imm) { MIPSInstr *i = LibVEX_Alloc_inline(sizeof(MIPSInstr)); @@ -1188,6 +1679,93 @@ MIPSInstr* MIPSInstr_ProfInc ( void ) { return i; } + +MIPSInstr* MIPSInstr_MsaMi10(MSAMI10Op op, UInt s10, HReg rs, HReg wd, + MSADF df) { + MIPSInstr* i = LibVEX_Alloc_inline(sizeof(MIPSInstr)); + i->tag = Msa_MI10; + i->Min.MsaMi10.op = op; + i->Min.MsaMi10.s10 = s10; + i->Min.MsaMi10.rs = rs; + i->Min.MsaMi10.wd = wd; + i->Min.MsaMi10.df = df; + return i; +} + +MIPSInstr* MIPSInstr_MsaElm(MSAELMOp op, HReg ws, HReg wd, UInt dfn ) { + MIPSInstr* i = LibVEX_Alloc_inline(sizeof(MIPSInstr)); + i->tag = Msa_ELM; + i->Min.MsaElm.op = op; + i->Min.MsaElm.ws = ws; + i->Min.MsaElm.wd = wd; + i->Min.MsaElm.dfn = dfn; + return i; +} + +MIPSInstr* MIPSInstr_Msa2R(MSA2ROp op, MSADF df, HReg ws, HReg wd ) { + MIPSInstr* i = LibVEX_Alloc_inline(sizeof(MIPSInstr)); + i->tag = Msa_2R; + i->Min.Msa2R.op = op; + i->Min.Msa2R.df = df; + i->Min.Msa2R.ws = ws; + i->Min.Msa2R.wd = wd; + return i; +} + +MIPSInstr* MIPSInstr_Msa3R(MSA3ROp op, MSADF df, HReg wd, HReg ws, HReg wt) { + MIPSInstr* i = LibVEX_Alloc_inline(sizeof(MIPSInstr)); + i->tag = Msa_3R; + i->Min.Msa3R.op = op; + i->Min.Msa3R.df = df; + i->Min.Msa3R.wd = wd; + i->Min.Msa3R.wt = wt; + i->Min.Msa3R.ws = ws; + return i; +} + +MIPSInstr* MIPSInstr_MsaVec(MSAVECOp op, HReg wd, HReg ws, HReg wt) { + MIPSInstr* i = LibVEX_Alloc_inline(sizeof(MIPSInstr)); + i->tag = Msa_VEC; + i->Min.MsaVec.op = op; + i->Min.MsaVec.wd = wd; + i->Min.MsaVec.wt = wt; + i->Min.MsaVec.ws = ws; + return i; +} + +MIPSInstr* MIPSInstr_MsaBit(MSABITOp op, MSADF df, UChar ms, HReg ws, HReg wd) { + MIPSInstr* i = LibVEX_Alloc_inline(sizeof(MIPSInstr)); + i->tag = Msa_BIT; + i->Min.MsaBit.op = op; + i->Min.MsaBit.df = df; + i->Min.MsaBit.ws = ws; + i->Min.MsaBit.wd = wd; + i->Min.MsaBit.ms = ms; + return i; +} + +MIPSInstr* MIPSInstr_Msa3RF(MSA3RFOp op, MSADFFlx df, HReg wd, HReg ws, + HReg wt) { + MIPSInstr* i = LibVEX_Alloc_inline(sizeof(MIPSInstr)); + i->tag = Msa_3RF; + i->Min.Msa3RF.op = op; + i->Min.Msa3RF.df = df; + i->Min.Msa3RF.wd = wd; + i->Min.Msa3RF.wt = wt; + i->Min.Msa3RF.ws = ws; + return i; +} + +MIPSInstr* MIPSInstr_Msa2RF(MSA2RFOp op, MSADFFlx df, HReg wd, HReg ws) { + MIPSInstr *i = LibVEX_Alloc_inline(sizeof(MIPSInstr)); + i->tag = Msa_2RF; + i->Min.Msa2RF.op = op; + i->Min.Msa2RF.df = df; + i->Min.Msa2RF.wd = wd; + i->Min.Msa2RF.ws = ws; + return i; +} + /* -------- Pretty Print instructions ------------- */ static void ppLoadImm(HReg dst, ULong imm, Bool mode64) { @@ -1196,6 +1774,29 @@ static void ppLoadImm(HReg dst, ULong imm, Bool mode64) vex_printf(",0x%016llx", imm); } +static void MSAdfn(UInt dfn, MSADF* df, UInt* n) { + if ((dfn & 0x3e) == MSA_DFN_D) { + *df = MSA_D; + *n = dfn & 1; + return; + } + + if ((dfn & 0x3c) == MSA_DFN_W) { + *df = MSA_W; + *n = dfn & 3; + return; + } + + if ((dfn & 0x38) == MSA_DFN_H) { + *df = MSA_H; + *n = dfn & 7; + return; + } + + *df = MSA_B; + *n = dfn & 3; +} + void ppMIPSInstr(const MIPSInstr * i, Bool mode64) { switch (i->tag) { @@ -1587,6 +2188,162 @@ void ppMIPSInstr(const MIPSInstr * i, Bool mode64) "addu $8, $8, $1; " "sw $8, 4($9); " ); return; + case Msa_MI10: { + Int imm = (i->Min.MsaMi10.s10 << 22) >> 22; + + switch (i->Min.MsaMi10.df) { + case MSA_B: + break; + + case MSA_H: + imm <<= 1; + break; + + case MSA_W: + imm <<= 2; + break; + + case MSA_D: + imm <<= 3; + break; + } + + vex_printf("%s.%c ", showMsaMI10op(i->Min.MsaMi10.op), + showMsaDF(i->Min.MsaMi10.df)); + ppHRegMIPS(i->Min.MsaMi10.wd, mode64); + vex_printf(", (%d)", imm); + ppHRegMIPS(i->Min.MsaMi10.rs, mode64); + return; + } + + case Msa_ELM: + switch (i->Min.MsaElm.op) { + case MSA_MOVE: + vex_printf("move.v "); + ppHRegMIPS(i->Min.MsaElm.wd, mode64); + vex_printf(", "); + ppHRegMIPS(i->Min.MsaElm.ws, mode64); + break; + + case MSA_SLDI: { + MSADF df; + UInt n; + MSAdfn(i->Min.MsaElm.dfn, &df, &n); + vex_printf("%s.%c ", showMsaElmOp(i->Min.MsaElm.op), + showMsaDF(df)); + ppHRegMIPS(i->Min.MsaElm.wd, mode64); + vex_printf(", "); + ppHRegMIPS(i->Min.MsaElm.ws, mode64); + vex_printf("[%u]", n); + break; + } + + case MSA_INSVE: { + MSADF df; + UInt n; + MSAdfn(i->Min.MsaElm.dfn, &df, &n); + vex_printf("%s.%c ", showMsaElmOp(i->Min.MsaElm.op), + showMsaDF(df)); + ppHRegMIPS(i->Min.MsaElm.wd, mode64); + vex_printf("[%u], ", n); + ppHRegMIPS(i->Min.MsaElm.ws, mode64); + vex_printf("[0]"); + break; + } + + case MSA_COPY_S: + case MSA_COPY_U: { + MSADF df; + UInt n; + MSAdfn(i->Min.MsaElm.dfn, &df, &n); + vex_printf("%s.%c ", showMsaElmOp(i->Min.MsaElm.op), + showMsaDF(df)); + ppHRegMIPS(i->Min.MsaElm.wd, mode64); + vex_printf(", "); + ppHRegMIPS(i->Min.MsaElm.ws, mode64); + vex_printf("[%u]", n); + break; + } + + case MSA_INSERT: { + MSADF df; + UInt n; + MSAdfn(i->Min.MsaElm.dfn, &df, &n); + vex_printf("%s.%c ", showMsaElmOp(i->Min.MsaElm.op), + showMsaDF(df)); + ppHRegMIPS(i->Min.MsaElm.wd, mode64); + vex_printf("[%u], ", n); + ppHRegMIPS(i->Min.MsaElm.ws, mode64); + break; + } + + case MSA_CFCMSA: + vex_printf("cfcmsa "); + ppHRegMIPS(i->Min.MsaElm.wd, mode64); + vex_printf(", $1"); + break; + + case MSA_CTCMSA: + vex_printf("ctcmsa $1, "); + ppHRegMIPS(i->Min.MsaElm.ws, mode64); + break; + } + + return; + + case Msa_3R: + vex_printf("%s.%c ", + showMsa3ROp(i->Min.Msa3R.op), showMsaDF(i->Min.Msa3R.df)); + ppHRegMIPS(i->Min.Msa3R.wd, mode64); + vex_printf(", "); + ppHRegMIPS(i->Min.Msa3R.ws, mode64); + vex_printf(", "); + ppHRegMIPS(i->Min.Msa3R.wt, mode64); + return; + + case Msa_2R: + vex_printf("%s.%c ", + showMsa2ROp(i->Min.Msa2R.op), showMsaDF(i->Min.Msa2R.df)); + ppHRegMIPS(i->Min.Msa2R.wd, mode64); + vex_printf(", "); + ppHRegMIPS(i->Min.Msa2R.ws, mode64); + return; + + case Msa_VEC: + vex_printf("%s ", showMsaVecOp(i->Min.MsaVec.op)); + ppHRegMIPS(i->Min.MsaVec.wd, mode64); + vex_printf(", "); + ppHRegMIPS(i->Min.MsaVec.ws, mode64); + vex_printf(", "); + ppHRegMIPS(i->Min.MsaVec.wt, mode64); + return; + + case Msa_BIT: + vex_printf("%s.%c ", showMsaBitOp(i->Min.MsaBit.op), + showMsaDF(i->Min.MsaBit.df)); + ppHRegMIPS(i->Min.MsaBit.wd, mode64); + vex_printf(", "); + ppHRegMIPS(i->Min.MsaBit.ws, mode64); + vex_printf(", %d ", i->Min.MsaBit.ms); + return; + + case Msa_3RF: + vex_printf("%s.%c ", showMsa3RFOp(i->Min.Msa3RF.op), + showMsaDFF(i->Min.Msa3RF.df, i->Min.Msa3RF.op)); + ppHRegMIPS(i->Min.Msa3RF.wd, mode64); + vex_printf(", "); + ppHRegMIPS(i->Min.Msa3RF.ws, mode64); + vex_printf(", "); + ppHRegMIPS(i->Min.Msa3RF.wt, mode64); + return; + + case Msa_2RF: + vex_printf("%s.%c ", showMsa2RFOp(i->Min.Msa2RF.op), + showMsaDFF(i->Min.Msa2RF.df, i->Min.Msa2RF.op)); + ppHRegMIPS(i->Min.Msa2RF.wd, mode64); + vex_printf(", "); + ppHRegMIPS(i->Min.Msa2RF.ws, mode64); + return; default: vpanic("ppMIPSInstr"); break; @@ -1659,6 +2416,84 @@ void getRegUsage_MIPSInstr(HRegUsage * u, const MIPSInstr * i, Bool mode64) addHRegUse(u, HRmRead, hregMIPS_LO(mode64)); addHRegUse(u, HRmWrite, i->Min.MfHL.dst); return; + case Msa_MI10: + addHRegUse(u, HRmRead, i->Min.MsaMi10.rs); + + switch (i->Min.MsaMi10.op) { + case MSA_LD: + addHRegUse(u, HRmWrite, i->Min.MsaMi10.wd); + break; + + case MSA_ST: + addHRegUse(u, HRmRead, i->Min.MsaMi10.wd); + break; + } + + return; + + case Msa_ELM: + if (LIKELY(i->Min.MsaElm.op != MSA_CFCMSA)) + addHRegUse(u, HRmRead, i->Min.MsaElm.ws); + + switch (i->Min.MsaElm.op) { + case MSA_COPY_S: + case MSA_COPY_U: + case MSA_MOVE: + case MSA_CFCMSA: + addHRegUse(u, HRmWrite, i->Min.MsaElm.wd); + break; + + case MSA_SLDI: + case MSA_INSERT: + case MSA_INSVE: + addHRegUse(u, HRmModify, i->Min.MsaElm.wd); + break; + case MSA_CTCMSA: + break; + } + + return; + + case Msa_3R: + addHRegUse(u, HRmRead, i->Min.Msa3R.ws); + addHRegUse(u, HRmRead, i->Min.Msa3R.wt); + + if (i->Min.Msa3R.op == MSA_SLD || + i->Min.Msa3R.op == MSA_VSHF) { + addHRegUse(u, HRmModify, i->Min.Msa3R.wd); + } else { + addHRegUse(u, HRmWrite, i->Min.Msa3R.wd); + } + + return; + + case Msa_2R: + addHRegUse(u, HRmWrite, i->Min.Msa2R.wd); + addHRegUse(u, HRmRead, i->Min.Msa2R.ws); + return; + + case Msa_VEC: + addHRegUse(u, HRmRead, i->Min.MsaVec.ws); + addHRegUse(u, HRmRead, i->Min.MsaVec.wt); + addHRegUse(u, HRmWrite, i->Min.MsaVec.wd); + return; + + case Msa_BIT: + addHRegUse(u, HRmRead, i->Min.MsaBit.ws); + addHRegUse(u, HRmWrite, i->Min.MsaBit.wd); + return; + + case Msa_3RF: + addHRegUse(u, HRmRead, i->Min.Msa3RF.ws); + addHRegUse(u, HRmRead, i->Min.Msa3RF.wt); + addHRegUse(u, HRmWrite, i->Min.Msa3RF.wd); + return; + + case Msa_2RF: + addHRegUse(u, HRmRead, i->Min.Msa2RF.ws); + addHRegUse(u, HRmWrite, i->Min.Msa2RF.wd); + return; + case Min_MtFCSR: addHRegUse(u, HRmRead, i->Min.MtFCSR.src); return; @@ -1890,12 +2725,55 @@ void mapRegs_MIPSInstr(HRegRemap * m, MIPSInstr * i, Bool mode64) mapReg(m, &i->Min.Div.srcL); mapReg(m, &i->Min.Div.srcR); return; + case Msa_MI10: + mapReg(m, &i->Min.MsaMi10.rs); + mapReg(m, &i->Min.MsaMi10.wd); + return; + + case Msa_ELM: + mapReg(m, &i->Min.MsaElm.ws); + mapReg(m, &i->Min.MsaElm.wd); + return; + + case Msa_2R: + mapReg(m, &i->Min.Msa2R.wd); + mapReg(m, &i->Min.Msa2R.ws); + return; + case Min_Call: { if (i->Min.Call.cond != MIPScc_AL) mapReg(m, &i->Min.Call.src); return; } + case Msa_3R: + mapReg(m, &i->Min.Msa3R.wt); + mapReg(m, &i->Min.Msa3R.ws); + mapReg(m, &i->Min.Msa3R.wd); + return; + + case Msa_VEC: + mapReg(m, &i->Min.MsaVec.wt); + mapReg(m, &i->Min.MsaVec.ws); + mapReg(m, &i->Min.MsaVec.wd); + return; + + case Msa_BIT: + mapReg(m, &i->Min.MsaBit.ws); + mapReg(m, &i->Min.MsaBit.wd); + return; + + case Msa_3RF: + mapReg(m, &i->Min.Msa3RF.wt); + mapReg(m, &i->Min.Msa3RF.ws); + mapReg(m, &i->Min.Msa3RF.wd); + return; + + case Msa_2RF: + mapReg(m, &i->Min.Msa2RF.ws); + mapReg(m, &i->Min.Msa2RF.wd); + return; + case Min_XDirect: mapRegs_MIPSAMode(m, i->Min.XDirect.amPC); return; @@ -2026,6 +2904,10 @@ void genSpill_MIPS( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2, HReg rreg, case HRcFlt64: *i1 = MIPSInstr_FpLdSt(False /*Store */ , 8, rreg, am); break; + case HRcVec128: + *i1 = MIPSInstr_MsaMi10(MSA_ST, (offsetB>>3), + GuestStatePointer(mode64), rreg, MSA_D); + break; default: ppHRegClass(hregClass(rreg)); vpanic("genSpill_MIPS: unimplemented regclass"); @@ -2058,6 +2940,10 @@ void genReload_MIPS( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2, HReg rreg, case HRcFlt64: *i1 = MIPSInstr_FpLdSt(True /*Load */ , 8, rreg, am); break; + case HRcVec128: + *i1 = MIPSInstr_MsaMi10(MSA_LD, (offsetB>>3), + GuestStatePointer(mode64), rreg, MSA_D); + break; default: ppHRegClass(hregClass(rreg)); vpanic("genReload_MIPS: unimplemented regclass"); @@ -2107,6 +2993,15 @@ inline static UInt dregNo(HReg r) return n; } +inline static UInt qregEnc ( HReg r ) +{ + UInt n; + vassert(!hregIsVirtual(r)); + n = hregEncoding(r); + vassert(n <= 31); + return n; +} + /* Emit 32bit instruction */ static UChar *emit32(UChar * p, UInt w32) { @@ -2211,6 +3106,108 @@ static UChar *mkFormS(UChar * p, UInt opc1, UInt rRD, UInt rRS, UInt rRT, return emit32(p, theInstr); } +static UChar *mkFormMI10(UChar * p, UInt msa, UInt s10, UInt rRS, UInt rWD, + UInt opc, UInt rDF) { + UInt theInstr; + vassert(rDF < 0x04); + vassert(opc < 0x10); + vassert(rWD < 0x20); + vassert(rRS < 0x20); + vassert(s10 < 0x400); + vassert(msa < 0x40); + theInstr = ((msa << 26) | (s10 << 16) | (rRS << 11) | (rWD << 6) | + ((opc << 2) | rDF)); + return emit32(p, theInstr); +} + +static UChar *mkFormELM(UChar *p, UInt msa, UInt op, UInt df, UInt ws, UInt wd, + UInt opc) { + UInt theInstr; + vassert(msa < 0x40); + vassert(ws < 0x20); + vassert(wd < 0x20); + vassert(opc < 0x40); + theInstr = ((msa << 26) | (op << 22) | (df << 16) | (ws << 11) | + ((wd << 6) | opc)); + return emit32(p, theInstr); +} + +static UChar *mkForm2R(UChar *p, UInt msa, UInt op, UInt df, UInt ws, UInt wd, + UInt opc) { + UInt theInstr; + theInstr = ((msa << 26) | (op << 18) | (df << 16) | (ws << 11) | + (wd << 6) | opc); + return emit32(p, theInstr); +} + +static UChar *mkForm3R(UChar *p, UInt op, UInt df, UInt wd, UInt ws, UInt wt) { + UInt theInstr; + vassert(op < 0x3800040); + vassert(df < 0x40); + vassert(wt < 0x20); + vassert(ws < 0x20); + vassert(wd < 0x20); + theInstr = OPC_MSA | op | (df << 21) | (wt << 16) | (ws << 11) | + (wd << 6); + return emit32(p, theInstr); +} + +static UChar *mkFormVEC(UChar *p, UInt op, UInt ws, UInt wt, UInt wd) { + UInt theInstr; + vassert(op < 0x20); + vassert(wt < 0x20); + vassert(ws < 0x20); + vassert(wd < 0x20); + theInstr = OPC_MSA | (op << 21) | (wt << 16) | (ws << 11) | + (wd << 6) | 0x1E; + return emit32(p, theInstr); +} + +static UChar *mkFormBIT(UChar *p, UInt op, UInt df, UInt ms, UInt ws, UInt wd) { + UInt theInstr; + UInt dfm = 0; + vassert(op < 0x3800040); + vassert(df < 0x40); + vassert(ms < 0x100); + vassert(ws < 0x20); + vassert(wd < 0x20); + + switch (df) { + case 0: + dfm |= 0x10; + + case 1: + dfm |= 0x20; + + case 2: + dfm |= 0x40; + } + + dfm |= ms; + theInstr = OPC_MSA | op | (dfm << 16) | (ws << 11) | + (wd << 6); + return emit32(p, theInstr); +} + +static UChar *mkForm3RF(UChar *p, UInt op, UInt df, UInt wd, UInt ws, UInt wt) { + UInt theInstr; + vassert(op < 0x3C0001D); + vassert(df < 0x40); + vassert(wt < 0x20); + vassert(ws < 0x20); + vassert(wd < 0x20); + theInstr = OPC_MSA | op | (df << 21) | (wt << 16) | (ws << 11) | + (wd << 6); + return emit32(p, theInstr); +} + +static UChar *mkForm2RF(UChar *p, UInt op, UInt df, UInt ws, UInt wd, + UInt opc) { + UInt theInstr; + theInstr = OPC_MSA | (op << 17) | (df << 16) | (ws << 11) | (wd << 6) | opc; + return emit32(p, theInstr); +} + static UChar *doAMode_IR(UChar * p, UInt opc1, UInt rSD, MIPSAMode * am, Bool mode64) { @@ -2563,6 +3560,7 @@ Int emit_MIPSInstr ( /*MB_MOD*/Bool* is_profInc, UChar *ptmp = p; vassert(nbuf >= 32); + switch (i->tag) { case Min_LI: p = mkLoadImm(p, iregNo(i->Min.LI.dst, mode64), i->Min.LI.imm, mode64); @@ -2676,6 +3674,131 @@ Int emit_MIPSInstr ( /*MB_MOD*/Bool* is_profInc, goto done; } + case Msa_MI10: { + UInt v_reg = qregEnc(i->Min.MsaMi10.wd); + UInt r_reg = iregNo(i->Min.MsaMi10.rs, mode64); + p = mkFormMI10(p, 0x1E, i->Min.MsaMi10.s10, r_reg, v_reg, i->Min.MsaMi10.op, + i->Min.MsaMi10.df); + goto done; + } + + case Msa_ELM: { + UInt v_src, v_dst; + + switch (i->Min.MsaElm.op) { + case MSA_INSERT: + v_src = iregNo(i->Min.MsaElm.ws, mode64); + v_dst = qregEnc(i->Min.MsaElm.wd); + break; + + case MSA_COPY_S: + case MSA_COPY_U: + v_src = qregEnc(i->Min.MsaElm.ws); + v_dst = iregNo(i->Min.MsaElm.wd, mode64); + break; + + case MSA_CTCMSA: + v_src = iregNo(i->Min.MsaElm.ws, mode64); + v_dst = 1; + break; + + case MSA_CFCMSA: + v_src = 1; + v_dst = iregNo(i->Min.MsaElm.wd, mode64); + break; + + default: + v_src = qregEnc(i->Min.MsaElm.ws); + v_dst = qregEnc(i->Min.MsaElm.wd); + break; + } + + switch (i->Min.MsaElm.op) { + case MSA_MOVE: + case MSA_CTCMSA: + case MSA_CFCMSA: + p = mkFormELM(p, 0x1E, 0, i->Min.MsaElm.op, v_src, v_dst, 25); + break; + + default: + p = mkFormELM(p, 0x1E, i->Min.MsaElm.op, i->Min.MsaElm.dfn, v_src, v_dst, 25); + break; + } + + goto done; + } + + case Msa_3R: { + UInt v_wt; + + switch (i->Min.Msa3R.op) { + case MSA_SLD: + case MSA_SPLAT: + v_wt = iregNo(i->Min.Msa3R.wt, mode64); + break; + + default: + v_wt = qregEnc(i->Min.Msa3R.wt); + break; + } + + UInt v_ws = qregEnc(i->Min.Msa3R.ws); + UInt v_wd = qregEnc(i->Min.Msa3R.wd);; + p = mkForm3R(p, i->Min.Msa3R.op, i->Min.Msa3R.df, v_wd, v_ws, v_wt); + goto done; + } + + case Msa_2R: { + UInt v_src; + UInt v_dst; + + switch (i->Min.Msa2R.op) { + case MSA_FILL: + v_src = iregNo(i->Min.Msa2R.ws, mode64); + v_dst = qregEnc(i->Min.Msa2R.wd); + break; + + default: + v_src = qregEnc(i->Min.Msa2R.ws); + v_dst = qregEnc(i->Min.Msa2R.wd); + break; + } + + p = mkForm2R(p, 0x1E, i->Min.Msa2R.op, i->Min.Msa2R.df, v_src, v_dst, 0x1E); + goto done; + } + + case Msa_2RF: { + UInt v_src = qregEnc(i->Min.Msa2RF.ws); + UInt v_dst = qregEnc(i->Min.Msa2RF.wd); + p = mkForm2RF(p, i->Min.Msa2RF.op, i->Min.Msa2RF.df, v_src, v_dst, 0x1E); + goto done; + } + + case Msa_VEC: { + UInt v_wt = qregEnc(i->Min.MsaVec.wt); + UInt v_ws = qregEnc(i->Min.MsaVec.ws); + UInt v_wd = qregEnc(i->Min.MsaVec.wd); + p = mkFormVEC(p, i->Min.MsaVec.op, v_wt, v_ws, v_wd); + goto done; + } + + case Msa_BIT: { + UInt v_ws = qregEnc(i->Min.MsaBit.ws); + UInt v_wd = qregEnc(i->Min.MsaBit.wd); + p = mkFormBIT(p, i->Min.MsaBit.op, i->Min.Msa3R.df, i->Min.MsaBit.ms, v_ws, + v_wd); + goto done; + } + + case Msa_3RF: { + UInt v_wt = qregEnc(i->Min.Msa3RF.wt); + UInt v_ws = qregEnc(i->Min.Msa3RF.ws); + UInt v_wd = qregEnc(i->Min.Msa3RF.wd);; + p = mkForm3RF(p, i->Min.Msa3RF.op, i->Min.Msa3RF.df, v_wd, v_ws, v_wt); + goto done; + } + case Min_Shft: { MIPSRH *srcR = i->Min.Shft.srcR; Bool sz32 = i->Min.Shft.sz32; diff --git a/VEX/priv/host_mips_defs.h b/VEX/priv/host_mips_defs.h index fb681ac4de..c49def072c 100644 --- a/VEX/priv/host_mips_defs.h +++ b/VEX/priv/host_mips_defs.h @@ -52,6 +52,10 @@ mkHReg(False, HRcFlt64, \ (_enc), (_mode64) ? (_ix64) : (_ix32)) +#define VEC(_mode64, _enc, _ix64, _ix32) \ + mkHReg(False, HRcVec128, \ + (_enc), (_mode64) ? (_ix64) : (_ix32)) + ST_IN HReg hregMIPS_GPR16 ( Bool mode64 ) { return GPR(mode64, 16, 0, 0); } ST_IN HReg hregMIPS_GPR17 ( Bool mode64 ) { return GPR(mode64, 17, 1, 1); } ST_IN HReg hregMIPS_GPR18 ( Bool mode64 ) { return GPR(mode64, 18, 2, 2); } @@ -75,57 +79,79 @@ ST_IN HReg hregMIPS_F26 ( Bool mode64 ) { return FR (mode64, 26, 17, 17); } ST_IN HReg hregMIPS_F28 ( Bool mode64 ) { return FR (mode64, 28, 18, 18); } ST_IN HReg hregMIPS_F30 ( Bool mode64 ) { return FR (mode64, 30, 19, 19); } +ST_IN HReg hregMIPS_W16 ( Bool mode64 ) { return VEC(mode64, 1, 20, 20); } +ST_IN HReg hregMIPS_W17 ( Bool mode64 ) { return VEC(mode64, 3, 21, 21); } +ST_IN HReg hregMIPS_W18 ( Bool mode64 ) { return VEC(mode64, 5, 22, 22); } +ST_IN HReg hregMIPS_W19 ( Bool mode64 ) { return VEC(mode64, 7, 23, 23); } +ST_IN HReg hregMIPS_W20 ( Bool mode64 ) { return VEC(mode64, 9, 24, 24); } +ST_IN HReg hregMIPS_W21 ( Bool mode64 ) { return VEC(mode64, 11, 25, 25); } +ST_IN HReg hregMIPS_W22 ( Bool mode64 ) { return VEC(mode64, 13, 26, 26); } +ST_IN HReg hregMIPS_W23 ( Bool mode64 ) { return VEC(mode64, 15, 27, 27); } +ST_IN HReg hregMIPS_W24 ( Bool mode64 ) { return VEC(mode64, 17, 28, 28); } +ST_IN HReg hregMIPS_W25 ( Bool mode64 ) { return VEC(mode64, 19, 29, 29); } +ST_IN HReg hregMIPS_W26 ( Bool mode64 ) { return VEC(mode64, 21, 30, 30); } +ST_IN HReg hregMIPS_W27 ( Bool mode64 ) { return VEC(mode64, 23, 31, 31); } +ST_IN HReg hregMIPS_W28 ( Bool mode64 ) { return VEC(mode64, 25, 32, 32); } +ST_IN HReg hregMIPS_W29 ( Bool mode64 ) { return VEC(mode64, 27, 33, 33); } +ST_IN HReg hregMIPS_W30 ( Bool mode64 ) { return VEC(mode64, 29, 34, 34); } +ST_IN HReg hregMIPS_W31 ( Bool mode64 ) { return VEC(mode64, 31, 35, 35); } + // DRs are only allocatable in 32-bit mode, so the 64-bit index numbering // doesn't advance here. ST_IN HReg hregMIPS_D0 ( Bool mode64 ) { vassert(!mode64); - return DR (mode64, 0, 0, 20); } + return DR (mode64, 0, 0, 36); } ST_IN HReg hregMIPS_D1 ( Bool mode64 ) { vassert(!mode64); - return DR (mode64, 2, 0, 21); } + return DR (mode64, 2, 0, 37); } ST_IN HReg hregMIPS_D2 ( Bool mode64 ) { vassert(!mode64); - return DR (mode64, 4, 0, 22); } + return DR (mode64, 4, 0, 38); } ST_IN HReg hregMIPS_D3 ( Bool mode64 ) { vassert(!mode64); - return DR (mode64, 6, 0, 23); } + return DR (mode64, 6, 0, 39); } ST_IN HReg hregMIPS_D4 ( Bool mode64 ) { vassert(!mode64); - return DR (mode64, 8, 0, 24); } + return DR (mode64, 8, 0, 40); } ST_IN HReg hregMIPS_D5 ( Bool mode64 ) { vassert(!mode64); - return DR (mode64, 10, 0, 25); } + return DR (mode64, 10, 0, 41); } ST_IN HReg hregMIPS_D6 ( Bool mode64 ) { vassert(!mode64); - return DR (mode64, 12, 0, 26); } + return DR (mode64, 12, 0, 42); } ST_IN HReg hregMIPS_D7 ( Bool mode64 ) { vassert(!mode64); - return DR (mode64, 14, 0, 27); } - -ST_IN HReg hregMIPS_HI ( Bool mode64 ) { return FR (mode64, 33, 20, 28); } -ST_IN HReg hregMIPS_LO ( Bool mode64 ) { return FR (mode64, 34, 21, 29); } - -ST_IN HReg hregMIPS_GPR0 ( Bool mode64 ) { return GPR(mode64, 0, 22, 30); } -ST_IN HReg hregMIPS_GPR1 ( Bool mode64 ) { return GPR(mode64, 1, 23, 31); } -ST_IN HReg hregMIPS_GPR2 ( Bool mode64 ) { return GPR(mode64, 2, 24, 32); } -ST_IN HReg hregMIPS_GPR3 ( Bool mode64 ) { return GPR(mode64, 3, 25, 33); } -ST_IN HReg hregMIPS_GPR4 ( Bool mode64 ) { return GPR(mode64, 4, 26, 34); } -ST_IN HReg hregMIPS_GPR5 ( Bool mode64 ) { return GPR(mode64, 5, 27, 35); } -ST_IN HReg hregMIPS_GPR6 ( Bool mode64 ) { return GPR(mode64, 6, 28, 36); } -ST_IN HReg hregMIPS_GPR7 ( Bool mode64 ) { return GPR(mode64, 7, 29, 37); } -ST_IN HReg hregMIPS_GPR8 ( Bool mode64 ) { return GPR(mode64, 8, 30, 38); } -ST_IN HReg hregMIPS_GPR9 ( Bool mode64 ) { return GPR(mode64, 9, 31, 39); } -ST_IN HReg hregMIPS_GPR10 ( Bool mode64 ) { return GPR(mode64, 10, 32, 40); } -ST_IN HReg hregMIPS_GPR11 ( Bool mode64 ) { return GPR(mode64, 11, 33, 41); } -ST_IN HReg hregMIPS_GPR23 ( Bool mode64 ) { return GPR(mode64, 23, 34, 42); } -ST_IN HReg hregMIPS_GPR25 ( Bool mode64 ) { return GPR(mode64, 25, 35, 43); } -ST_IN HReg hregMIPS_GPR29 ( Bool mode64 ) { return GPR(mode64, 29, 36, 44); } -ST_IN HReg hregMIPS_GPR31 ( Bool mode64 ) { return GPR(mode64, 31, 37, 45); } + return DR (mode64, 14, 0, 43); } + +ST_IN HReg hregMIPS_HI ( Bool mode64 ) { return FR (mode64, 33, 36, 44); } +ST_IN HReg hregMIPS_LO ( Bool mode64 ) { return FR (mode64, 34, 37, 45); } + +ST_IN HReg hregMIPS_GPR0 ( Bool mode64 ) { return GPR(mode64, 0, 38, 46); } +ST_IN HReg hregMIPS_GPR1 ( Bool mode64 ) { return GPR(mode64, 1, 39, 47); } +ST_IN HReg hregMIPS_GPR2 ( Bool mode64 ) { return GPR(mode64, 2, 40, 48); } +ST_IN HReg hregMIPS_GPR3 ( Bool mode64 ) { return GPR(mode64, 3, 41, 49); } +ST_IN HReg hregMIPS_GPR4 ( Bool mode64 ) { return GPR(mode64, 4, 42, 50); } +ST_IN HReg hregMIPS_GPR5 ( Bool mode64 ) { return GPR(mode64, 5, 43, 51); } +ST_IN HReg hregMIPS_GPR6 ( Bool mode64 ) { return GPR(mode64, 6, 44, 52); } +ST_IN HReg hregMIPS_GPR7 ( Bool mode64 ) { return GPR(mode64, 7, 45, 53); } +ST_IN HReg hregMIPS_GPR8 ( Bool mode64 ) { return GPR(mode64, 8, 46, 54); } +ST_IN HReg hregMIPS_GPR9 ( Bool mode64 ) { return GPR(mode64, 9, 47, 55); } +ST_IN HReg hregMIPS_GPR10 ( Bool mode64 ) { return GPR(mode64, 10, 48, 56); } +ST_IN HReg hregMIPS_GPR11 ( Bool mode64 ) { return GPR(mode64, 11, 49, 57); } +ST_IN HReg hregMIPS_GPR23 ( Bool mode64 ) { return GPR(mode64, 23, 50, 58); } +ST_IN HReg hregMIPS_GPR25 ( Bool mode64 ) { return GPR(mode64, 25, 51, 59); } +ST_IN HReg hregMIPS_GPR29 ( Bool mode64 ) { return GPR(mode64, 29, 52, 60); } +ST_IN HReg hregMIPS_GPR31 ( Bool mode64 ) { return GPR(mode64, 31, 53, 61); } #undef ST_IN #undef GPR #undef FR #undef DR +#undef VEC #define GuestStatePointer(_mode64) hregMIPS_GPR23(_mode64) #define StackFramePointer(_mode64) hregMIPS_GPR30(_mode64) #define StackPointer(_mode64) hregMIPS_GPR29(_mode64) +#define Zero(_mode64) hregMIPS_GPR0(_mode64) /* guest_COND offset */ #define COND_OFFSET(_mode64) ((_mode64) ? 588 : 448) +/* guest_MSACSR offset */ +#define MSACSR_OFFSET(_mode64) ((_mode64) ? 1144 : 1016) + /* Num registers used for function calls */ #if defined(VGP_mips32_linux) /* a0, a1, a2, a3 */ @@ -137,6 +163,7 @@ ST_IN HReg hregMIPS_GPR31 ( Bool mode64 ) { return GPR(mode64, 31, 37, 45); } extern UInt ppHRegMIPS ( HReg, Bool ); +#define OPC_MSA 0x78000000 /* --------- Condition codes, Intel encoding. --------- */ typedef enum { @@ -270,6 +297,153 @@ typedef enum { extern const HChar *showMIPSMaccOp(MIPSMaccOp, Bool); /* --------- */ +typedef enum { + MSA_LD = 8, + MSA_ST = 9 +} MSAMI10Op; + +extern const HChar *showMsaMI10op(MSAMI10Op); + +typedef enum { + MSA_SLDI = 0, + MSA_COPY_S = 2, + MSA_COPY_U = 3, + MSA_INSERT = 4, + MSA_INSVE = 5, + MSA_MOVE = 0xBE, + MSA_CFCMSA = 0x7E, + MSA_CTCMSA = 0x3E +} MSAELMOp; + +extern const HChar *showMsaElmOp(MSAELMOp); + +typedef enum { + MSA_FILL = 0xC0, + MSA_PCNT = 0xC1, + MSA_NLOC = 0xC2, + MSA_NLZC = 0xC3 +} MSA2ROp; + +extern const HChar *showMsa2ROp(MSA2ROp); + +typedef enum { + MSA_FTRUNC_S = 0x191, + MSA_FTRUNC_U = 0x192, + MSA_FFINT_S = 0x19E, + MSA_FFINT_U = 0x19F, + MSA_FSQRT = 0x193, + MSA_FRSQRT = 0x194, + MSA_FRCP = 0x195, + MSA_FLOG2 = 0x197, + MSA_FEXUPR = 0x199, + MSA_FTINT_U = 0x19D, + MSA_FTINT_S = 0x19C, +} MSA2RFOp; + +extern const HChar *showMsa2RFOp(MSA2RFOp); + +typedef enum { + MSA_SLL = 0xD, + MSA_ADDV, + MSA_CEQ, + MSA_ADD_A, + MSA_SUBS_S, + MSA_SLD = 0x14, + MSA_SRA = 0x80000D, + MSA_SUBV, + MSA_SUBS_U = 0x800011, + MSA_SRL = 0x100000D, + MSA_MAX_S, + MSA_CLT_S, + MSA_ADDS_S, + MSA_PCKEV = 0x1000014, + MSA_MAX_U = 0x180000E, + MSA_CLT_U, + MSA_ADDS_U, + MSA_PCKOD = 0x1800014, + MSA_MIN_S = 0x200000E, + MSA_ILVL = 0x2000014, + MSA_MIN_U = 0x280000E, + MSA_ILVR = 0x2800014, + MSA_AVER_S = 0x3000010, + MSA_ILVEV = 0x3000014, + MSA_AVER_U = 0x3800010, + MSA_ILVOD = 0x3800014, + MSA_MULV = 0x0000012, + MSA_SPLAT = 0x0800014, + MSA_DIVS = 0x2000012, + MSA_DIVU = 0x2800012, + MSA_VSHF = 0x0000015, +} MSA3ROp; + +extern const HChar *showMsa3ROp(MSA3ROp); + +typedef enum { + MSA_FADD = 0x000001B, + MSA_FCUN = 0x040001A, + MSA_FSUB = 0x040001B, + MSA_FCEQ = 0x080001A, + MSA_FMUL = 0x080001B, + MSA_FDIV = 0x0C0001B, + MSA_FMADD = 0x100001B, + MSA_FCLT = 0x100001A, + MSA_FMSUB = 0x140001B, + MSA_FEXP2 = 0x1C0001B, + MSA_FMIN = 0x300001B, + MSA_FMIN_A = 0x340001B, + MSA_FMAX = 0x380001B, + MSA_MUL_Q = 0x100001C, + MSA_FCLE = 0x180001A, + MSA_FTQ = 0x280001B, + MSA_FEXDO = 0x200001B, + MSA_MULR_Q = 0x300001C, +} MSA3RFOp; + +extern const HChar *showMsa3RFOp(MSA3RFOp); + +typedef enum { + MSA_ANDV, + MSA_ORV, + MSA_NORV, + MSA_XORV +} MSAVECOp; + +extern const HChar *showMsaVecOp(MSAVECOp); + +typedef enum { + MSA_SLLI = 9, + MSA_SAT_S, + MSA_SRAI = 0x800009, + MSA_SRLI = 0x1000009, + MSA_SRARI = 0x100000A +} MSABITOp; + +extern const HChar *showMsaBitOp(MSABITOp); + +typedef enum { + MSA_B = 0, + MSA_H = 1, + MSA_W = 2, + MSA_D = 3, +} MSADF; + +extern HChar showMsaDF(MSADF df); + +typedef enum { + MSA_DFN_B = 0x00, + MSA_DFN_H = 0x20, + MSA_DFN_W = 0x30, + MSA_DFN_D = 0x38, +} MSADFNMask; + +typedef enum { + MSA_F_WH = 0, + MSA_F_DW = 1, +} MSADFFlx; + +extern HChar showMsaDFF(MSADFFlx df, int op); + + /* ----- Instruction tags ----- */ typedef enum { Min_LI, /* load word (32/64-bit) immediate (fake insn) */ @@ -321,7 +495,16 @@ typedef enum { Min_FpCompare, /* FP compare, generating value into int reg */ Min_FpGpMove, /* Move from/to fpr to/from gpr */ - Min_MoveCond /* Move Conditional */ + Min_MoveCond, /* Move Conditional */ + + Msa_MI10, + Msa_ELM, + Msa_3R, + Msa_2R, + Msa_VEC, + Msa_BIT, + Msa_3RF, + Msa_2RF, } MIPSInstrTag; /* --------- */ @@ -498,6 +681,10 @@ typedef struct { HReg dst; MIPSAMode *src; } Load; + struct { + HReg data; + HReg addr; + } MsaLoad; /* 64/32/16/8 bit stores */ struct { UChar sz; /* 1|2|4|8 */ @@ -621,6 +808,58 @@ typedef struct { HReg src; HReg cond; } MoveCond; + struct { + MSAMI10Op op; + UInt s10; + HReg rs; + HReg wd; + MSADF df; + } MsaMi10; + struct { + MSAELMOp op; + HReg ws; + HReg wd; + UInt dfn; + } MsaElm; + struct { + MSA2ROp op; + MSADF df; + HReg ws; + HReg wd; + } Msa2R; + struct { + MSA3ROp op; + MSADF df; + HReg wt; + HReg ws; + HReg wd; + } Msa3R; + struct { + MSAVECOp op; + HReg wt; + HReg ws; + HReg wd; + } MsaVec; + struct { + MSABITOp op; + MSADF df; + UChar ms; + HReg ws; + HReg wd; + }MsaBit; + struct { + MSA3RFOp op; + MSADFFlx df; + HReg wt; + HReg ws; + HReg wd; + } Msa3RF; + struct { + MSA2RFOp op; + MSADFFlx df; + HReg ws; + HReg wd; + } Msa2RF; } Min; } MIPSInstr; @@ -695,6 +934,15 @@ extern MIPSInstr *MIPSInstr_EvCheck(MIPSAMode* amCounter, MIPSAMode* amFailAddr ); extern MIPSInstr *MIPSInstr_ProfInc( void ); +extern MIPSInstr* MIPSInstr_MsaMi10(MSAMI10Op op, UInt s10, HReg rs, HReg wd, MSADF df); +extern MIPSInstr* MIPSInstr_MsaElm(MSAELMOp op, HReg ws, HReg wd, UInt dfn); +extern MIPSInstr* MIPSInstr_Msa3R(MSA3ROp op, MSADF df, HReg wd, HReg ws, HReg wt); +extern MIPSInstr* MIPSInstr_Msa2R(MSA2ROp op, MSADF df, HReg ws, HReg wd); +extern MIPSInstr* MIPSInstr_MsaVec(MSAVECOp op, HReg wt, HReg ws, HReg wd); +extern MIPSInstr* MIPSInstr_MsaBit(MSABITOp op, MSADF df, UChar ms, HReg ws, HReg wd); +extern MIPSInstr* MIPSInstr_Msa3RF(MSA3RFOp op, MSADFFlx df, HReg wd, HReg ws, HReg wt); +extern MIPSInstr* MIPSInstr_Msa2RF(MSA2RFOp op, MSADFFlx df, HReg wd, HReg ws); + extern void ppMIPSInstr(const MIPSInstr *, Bool mode64); /* Some functions that insulate the register allocator from details @@ -754,6 +1002,7 @@ extern VexInvalRange patchProfInc_MIPS ( VexEndness endness_host, const ULong* location_of_counter, Bool mode64 ); + #endif /* ndef __VEX_HOST_MIPS_DEFS_H */ /*---------------------------------------------------------------*/ diff --git a/VEX/priv/host_mips_isel.c b/VEX/priv/host_mips_isel.c index 3d51919fd6..9e9bcb59ba 100644 --- a/VEX/priv/host_mips_isel.c +++ b/VEX/priv/host_mips_isel.c @@ -58,6 +58,9 @@ static Bool fp_mode64 = False; /* Host hwcaps */ static UInt hwcaps_host = 0; +/* Host CPU has MSA ASE */ +static Bool has_msa = False; + /* GPR register class for mips32/64 */ #define HRcGPR(_mode64) ((_mode64) ? HRcInt64 : HRcInt32) @@ -187,6 +190,13 @@ static HReg newVRegF(ISelEnv * env) return reg; } +static HReg newVRegV ( ISelEnv* env ) +{ + HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0, env->vreg_ctr); + env->vreg_ctr++; + return reg; +} + static void add_to_sp(ISelEnv * env, UInt n) { HReg sp = StackPointer(mode64); @@ -241,6 +251,11 @@ static MIPSRH *iselWordExpr_RH5u(ISelEnv * env, IRExpr * e); static MIPSRH *iselWordExpr_RH6u_wrk(ISelEnv * env, IRExpr * e); static MIPSRH *iselWordExpr_RH6u(ISelEnv * env, IRExpr * e); +/* Compute an I8 into a reg-or-7-bit-unsigned-immediate, the latter being an + immediate in the range 1 .. 127 inclusive. Used for doing shift amounts. */ +static MIPSRH *iselWordExpr_RH7u_wrk(ISelEnv * env, IRExpr * e); +static MIPSRH *iselWordExpr_RH7u(ISelEnv * env, IRExpr * e); + /* compute an I8/I16/I32 into a GPR*/ static HReg iselWordExpr_R_wrk(ISelEnv * env, IRExpr * e); static HReg iselWordExpr_R(ISelEnv * env, IRExpr * e); @@ -259,6 +274,9 @@ static void iselInt128Expr_wrk(HReg * rHi, HReg * rLo, ISelEnv * env, IRExpr * e); static void iselInt128Expr(HReg * rHi, HReg * rLo, ISelEnv * env, IRExpr * e); +static HReg iselV128Expr( ISelEnv* env, IRExpr* e ); +static HReg iselV128Expr_wrk( ISelEnv* env, IRExpr* e ); + static MIPSCondCode iselCondCode_wrk(ISelEnv * env, IRExpr * e); static MIPSCondCode iselCondCode(ISelEnv * env, IRExpr * e); @@ -300,6 +318,66 @@ static void set_MIPS_rounding_mode(ISelEnv * env, IRExpr * mode) addInstr(env, MIPSInstr_MtFCSR(tmp)); } +static void set_MIPS_rounding_mode_MSA(ISelEnv * env, IRExpr * mode) { + /* + rounding mode | MIPS | IR + ------------------------ + to nearest | 00 | 00 + to zero | 01 | 11 + to +infinity | 10 | 10 + to -infinity | 11 | 01 + */ + /* rm_MIPS32 = XOR(rm_IR , (rm_IR << 1)) & 3 */ + HReg irrm = iselWordExpr_R(env, mode); + HReg tmp = newVRegI(env); + HReg msacsr_old = newVRegI(env); + MIPSAMode *am_addr; + addInstr(env, MIPSInstr_Shft(Mshft_SLL, True, tmp, irrm, + MIPSRH_Imm(False, 1))); + addInstr(env, MIPSInstr_Alu(Malu_XOR, tmp, irrm, MIPSRH_Reg(tmp))); + addInstr(env, MIPSInstr_Alu(Malu_AND, tmp, tmp, MIPSRH_Imm(False, 3))); + /* save old value of MSACSR */ + addInstr(env, MIPSInstr_MsaElm(MSA_CFCMSA, hregMIPS_GPR0(mode64), msacsr_old, + MSA_DFN_W)); + sub_from_sp(env, 8); /* Move SP down 8 bytes */ + am_addr = MIPSAMode_IR(0, StackPointer(mode64)); + /* store old MSACSR to stack */ + addInstr(env, MIPSInstr_Store(4, am_addr, msacsr_old, mode64)); + /* set new value of MSACSR */ + addInstr(env, MIPSInstr_MsaElm(MSA_CTCMSA, tmp, hregMIPS_GPR0(mode64), + MSA_DFN_W)); +} + + +static void set_guest_MIPS_rounding_mode_MSA(ISelEnv * env) { + /* + rounding mode | MIPS | IR + ------------------------ + to nearest | 00 | 00 + to zero | 01 | 11 + to +infinity | 10 | 10 + to -infinity | 11 | 01 + */ + /* rm_MIPS32 = XOR(rm_IR , (rm_IR << 1)) & 3 */ + HReg irrm = newVRegI(env); + HReg msacsr_old = newVRegI(env); + MIPSAMode *am_addr; + MIPSAMode *rm_addr = MIPSAMode_IR(MSACSR_OFFSET(mode64), + GuestStatePointer(mode64)); + addInstr(env, MIPSInstr_Load(4, irrm, rm_addr, mode64)); + /* save old value of MSACSR */ + addInstr(env, MIPSInstr_MsaElm(MSA_CFCMSA, hregMIPS_GPR0(mode64), msacsr_old, + MSA_DFN_W)); + sub_from_sp(env, 8); /* Move SP down 8 bytes */ + am_addr = MIPSAMode_IR(0, StackPointer(mode64)); + /* store old MSACSR to stack */ + addInstr(env, MIPSInstr_Store(4, am_addr, msacsr_old, mode64)); + /* set new value of MSACSR */ + addInstr(env, MIPSInstr_MsaElm(MSA_CTCMSA, irrm, hregMIPS_GPR0(mode64), + MSA_DFN_W)); +} + + static void set_MIPS_rounding_default(ISelEnv * env) { HReg fcsr = newVRegI(env); @@ -315,6 +393,18 @@ static void set_MIPS_rounding_default(ISelEnv * env) addInstr(env, MIPSInstr_MtFCSR(fcsr)); } +static void set_MIPS_rounding_default_MSA(ISelEnv * env) { + HReg msacsr = newVRegI(env); + /* load as float */ + MIPSAMode *am_addr; + am_addr = MIPSAMode_IR(0, StackPointer(mode64)); + addInstr(env, MIPSInstr_Load(4, msacsr, am_addr, mode64)); + add_to_sp(env, 8); /* Reset SP */ + /* set new value of FCSR*/ + addInstr(env, MIPSInstr_MsaElm(MSA_CTCMSA, msacsr, hregMIPS_GPR0(mode64), + MSA_DFN_W)); +} + /*---------------------------------------------------------*/ /*--- ISEL: Misc helpers ---*/ /*---------------------------------------------------------*/ @@ -515,7 +605,7 @@ static void doHelperCall(/*OUT*/UInt* stackAdjustAfterCall, if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) aTy = typeOfIRExpr(env->type_env, arg); - if (aTy == Ity_I32 || mode64) { + if (aTy == Ity_I32 || (mode64 && aTy != Ity_INVALID)) { argiregs |= (1 << (argreg + 4)); addInstr(env, mk_iMOVds_RR(argregs[argreg], iselWordExpr_R(env, arg))); @@ -556,7 +646,7 @@ static void doHelperCall(/*OUT*/UInt* stackAdjustAfterCall, if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) aTy = typeOfIRExpr(env->type_env, arg); - if (aTy == Ity_I32 || (mode64 && arg->tag != Iex_GSPTR)) { + if (aTy == Ity_I32 || (mode64 && aTy != Ity_INVALID)) { tmpregs[argreg] = iselWordExpr_R(env, arg); argreg++; } else if (aTy == Ity_I64) { /* Ity_I64 */ @@ -575,8 +665,8 @@ static void doHelperCall(/*OUT*/UInt* stackAdjustAfterCall, argreg++; } else if (arg->tag == Iex_VECRET) { - // If this happens, it denotes ill-formed IR - vassert(0); + tmpregs[argreg++] = StackPointer(mode64); + sub_from_sp(env, 16); /* Move SP down 16 bytes */ } } @@ -623,7 +713,6 @@ static void doHelperCall(/*OUT*/UInt* stackAdjustAfterCall, *retloc = mk_RetLoc_simple(RLPri_Int); break; case Ity_V128: - vassert(0); // ATC *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0); *stackAdjustAfterCall = 16; break; @@ -957,8 +1046,22 @@ static HReg iselWordExpr_R_wrk(ISelEnv * env, IRExpr * e) return r_dst; } + if (!mode64 && (e->Iex.Binop.op == Iop_CasCmpEQ64 + || e->Iex.Binop.op == Iop_CmpEQ64)) { + HReg tmp1, tmp2, tmp3, tmp4; + HReg dst1 = newVRegI(env); + HReg dst2 = newVRegI(env); + iselInt64Expr(&tmp1, &tmp2, env, e->Iex.Binop.arg1); + iselInt64Expr(&tmp3, &tmp4, env, e->Iex.Binop.arg2); + addInstr(env, MIPSInstr_Cmp(False, True, dst1, tmp1, tmp3, MIPScc_EQ)); + addInstr(env, MIPSInstr_Cmp(False, True, dst2, tmp2, tmp4, MIPScc_EQ)); + addInstr(env, MIPSInstr_Alu(Malu_AND, dst1, dst1, MIPSRH_Reg(dst2))); + return dst1; + } + /* Cmp*32*(x,y) ? */ if (e->Iex.Binop.op == Iop_CmpEQ32 + || e->Iex.Binop.op == Iop_CmpEQ8 || e->Iex.Binop.op == Iop_CmpEQ16 || e->Iex.Binop.op == Iop_CmpNE32 || e->Iex.Binop.op == Iop_CmpNE64 @@ -990,6 +1093,7 @@ static HReg iselWordExpr_R_wrk(ISelEnv * env, IRExpr * e) cc = MIPScc_EQ; size32 = True; break; + case Iop_CmpEQ8: case Iop_CmpEQ16: cc = MIPScc_EQ; size32 = True; @@ -1100,6 +1204,38 @@ static HReg iselWordExpr_R_wrk(ISelEnv * env, IRExpr * e) return r_tmpR; } + if (e->Iex.Binop.op == Iop_MullU8 || + e->Iex.Binop.op == Iop_MullS8 || + e->Iex.Binop.op == Iop_MullU16 || + e->Iex.Binop.op == Iop_MullS16) { + Bool syned = toBool((e->Iex.Binop.op == Iop_MullS8) || + (e->Iex.Binop.op == Iop_MullS16)); + HReg r_dst = newVRegI(env); + HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1); + HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2); + if (syned) { + Int no_bits = (e->Iex.Binop.op == Iop_MullS16) ? 16 : 24; + addInstr(env, MIPSInstr_Shft(Mshft_SLL, True, + r_srcL, r_srcL, + MIPSRH_Imm(False, no_bits))); + addInstr(env, MIPSInstr_Shft(Mshft_SRA, True, + r_srcL, r_srcL, + MIPSRH_Imm(False, no_bits))); + addInstr(env, MIPSInstr_Shft(Mshft_SLL, True, + r_srcR, r_srcR, + MIPSRH_Imm(False, no_bits))); + addInstr(env, MIPSInstr_Shft(Mshft_SRA, True, + r_srcR, r_srcR, + MIPSRH_Imm(False, no_bits))); + addInstr(env, MIPSInstr_Mul(r_dst, r_srcL, r_srcR)); + + } else { + addInstr(env, MIPSInstr_Mult(syned, r_srcL, r_srcR)); + addInstr(env, MIPSInstr_Mflo(r_dst)); + } + return r_dst; + } + if (e->Iex.Binop.op == Iop_CmpF64) { HReg r_srcL, r_srcR; if (mode64) { @@ -1214,6 +1350,22 @@ static HReg iselWordExpr_R_wrk(ISelEnv * env, IRExpr * e) return r_dst; } + if (e->Iex.Binop.op == Iop_DivS32 || + e->Iex.Binop.op == Iop_DivU32 || + (e->Iex.Binop.op == Iop_DivS64 && mode64) || + (e->Iex.Binop.op == Iop_DivU64 && mode64)) { + HReg r_dst = newVRegI(env); + Bool syned = toBool(e->Iex.Binop.op == Iop_DivS32 || + e->Iex.Binop.op == Iop_DivS64); + Bool div32 = toBool(e->Iex.Binop.op == Iop_DivS32 || + e->Iex.Binop.op == Iop_DivU32); + HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2); + HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1); + addInstr(env, MIPSInstr_Div(syned, div32, r_srcL, r_srcR)); + addInstr(env, MIPSInstr_Mflo(r_dst)); + return r_dst; + } + if (e->Iex.Binop.op == Iop_8HLto16 || e->Iex.Binop.op == Iop_16HLto32) { HReg tHi = iselWordExpr_R(env, e->Iex.Binop.arg1); @@ -1308,6 +1460,163 @@ static HReg iselWordExpr_R_wrk(ISelEnv * env, IRExpr * e) return r_dst; } + if (e->Iex.Binop.op == Iop_F32toI32U) { + HReg valF = iselFltExpr(env, e->Iex.Binop.arg2); + HReg tmpD = newVRegD(env); + HReg r_dst = newVRegI(env); + MIPSAMode *am_addr; + + /* CVTLS tmpD, valF */ + set_MIPS_rounding_mode(env, e->Iex.Binop.arg1); + addInstr(env, MIPSInstr_FpConvert(Mfp_CVTLS, tmpD, valF)); + set_MIPS_rounding_default(env); + + sub_from_sp(env, 16); /* Move SP down 16 bytes */ + am_addr = MIPSAMode_IR(0, StackPointer(mode64)); + + /* store as F64 */ + addInstr(env, MIPSInstr_FpLdSt(False /*store */ , 8, tmpD, + am_addr)); + /* load as 2xI32 */ +#if defined (_MIPSEL) + addInstr(env, MIPSInstr_Load(4, r_dst, am_addr, mode64)); +#elif defined (_MIPSEB) + addInstr(env, MIPSInstr_Load(4, r_dst, nextMIPSAModeFloat(am_addr), + mode64)); +#endif + + /* Reset SP */ + add_to_sp(env, 16); + + return r_dst; + } + + if (e->Iex.Binop.op == Iop_F64toI64U) { + HReg r_src; + HReg tmp = newVRegV(env); + vassert(has_msa); + r_src = iselFltExpr( env, e->Iex.Binop.arg2); + set_MIPS_rounding_mode_MSA(env, e->Iex.Binop.arg1); + addInstr(env, MIPSInstr_Msa2RF(MSA_FTINT_U, MSA_F_DW, tmp, r_src)); + HReg r_dst = newVRegI(env); + addInstr(env, + MIPSInstr_MsaElm(MSA_COPY_S, tmp, r_dst, MSA_DFN_D | 0)); + set_MIPS_rounding_default_MSA(env); + return r_dst; + } + + if (e->Iex.Binop.op == Iop_GetElem8x16) { + HReg v_src = iselV128Expr(env, e->Iex.Binop.arg1); + HReg r_dst = newVRegI(env); + MIPSRH *tmp = iselWordExpr_RH(env, False, e->Iex.Binop.arg2); + vassert(has_msa); + switch (tmp->tag) { + case Mrh_Imm: + addInstr(env, + MIPSInstr_MsaElm(MSA_COPY_U, v_src, r_dst, + MSA_DFN_B | + (tmp->Mrh.Imm.imm16 & 0x0f))); + break; + + case Mrh_Reg: { + HReg v_tmp = newVRegV(env); + addInstr(env, + MIPSInstr_Msa3R(MSA_SPLAT, MSA_B, v_tmp, v_src, + tmp->Mrh.Reg.reg)); + addInstr(env, + MIPSInstr_MsaElm(MSA_COPY_U, v_tmp, r_dst, + MSA_DFN_B)); + break; + } + } + + return r_dst; + } + + + if (e->Iex.Binop.op == Iop_GetElem16x8) { + HReg v_src = iselV128Expr(env, e->Iex.Binop.arg1); + HReg r_dst = newVRegI(env); + MIPSRH *tmp = iselWordExpr_RH(env, False, e->Iex.Binop.arg2); + vassert(has_msa); + switch (tmp->tag) { + case Mrh_Imm: + addInstr(env, + MIPSInstr_MsaElm(MSA_COPY_U, v_src, r_dst, + MSA_DFN_H | + (tmp->Mrh.Imm.imm16 & 0x07))); + break; + + case Mrh_Reg: { + HReg v_tmp = newVRegV(env); + addInstr(env, + MIPSInstr_Msa3R(MSA_SPLAT, MSA_H, v_tmp, v_src, + tmp->Mrh.Reg.reg)); + addInstr(env, + MIPSInstr_MsaElm(MSA_COPY_U, v_tmp, r_dst, + MSA_DFN_H)); + break; + } + } + + return r_dst; + } + + if (e->Iex.Binop.op == Iop_GetElem32x4) { + HReg v_src = iselV128Expr(env, e->Iex.Binop.arg1); + HReg r_dst = newVRegI(env); + MIPSRH *tmp = iselWordExpr_RH(env, False, e->Iex.Binop.arg2); + vassert(has_msa); + switch (tmp->tag) { + case Mrh_Imm: + addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, v_src, r_dst, + MSA_DFN_W | + (tmp->Mrh.Imm.imm16 & 0x03))); + break; + + case Mrh_Reg: { + HReg v_tmp = newVRegV(env); + addInstr(env, + MIPSInstr_Msa3R(MSA_SPLAT, MSA_W, v_tmp, v_src, + tmp->Mrh.Reg.reg)); + addInstr(env, + MIPSInstr_MsaElm(MSA_COPY_S, v_tmp, r_dst, + MSA_DFN_W)); + break; + } + } + + return r_dst; + } + if (e->Iex.Binop.op == Iop_GetElem64x2) { + vassert(mode64); + HReg v_src = iselV128Expr(env, e->Iex.Binop.arg1); + HReg r_dst = newVRegI(env); + MIPSRH *tmp = iselWordExpr_RH(env, False, e->Iex.Binop.arg2); + vassert(has_msa); + switch (tmp->tag) { + case Mrh_Imm: + addInstr(env, + MIPSInstr_MsaElm(MSA_COPY_S, v_src, r_dst, + MSA_DFN_D | + (tmp->Mrh.Imm.imm16 & 0x01))); + break; + + case Mrh_Reg: { + HReg v_tmp = newVRegV(env); + addInstr(env, + MIPSInstr_Msa3R(MSA_SPLAT, MSA_D, v_tmp, v_src, + tmp->Mrh.Reg.reg)); + addInstr(env, + MIPSInstr_MsaElm(MSA_COPY_S, v_tmp, r_dst, + MSA_DFN_D)); + break; + } + } + + return r_dst; + } + /* -------- DSP ASE -------- */ /* All used cases involving host-side helper calls. */ void* fn = NULL; @@ -1502,11 +1811,15 @@ static HReg iselWordExpr_R_wrk(ISelEnv * env, IRExpr * e) case Iop_64to1: case Iop_64to8: { - vassert(mode64); HReg r_src, r_dst; UShort mask = (op_unop == Iop_64to1) ? 0x1 : 0xFF; r_dst = newVRegI(env); - r_src = iselWordExpr_R(env, e->Iex.Unop.arg); + if (mode64) + r_src = iselWordExpr_R(env, e->Iex.Unop.arg); + else { + HReg tmp; + iselInt64Expr(&tmp, &r_src, env, e->Iex.Unop.arg); + } addInstr(env, MIPSInstr_Alu(Malu_AND, r_dst, r_src, MIPSRH_Imm(False, mask))); return r_dst; @@ -1720,6 +2033,52 @@ static HReg iselWordExpr_R_wrk(ISelEnv * env, IRExpr * e) return rLo; /* and abandon rLo .. poor wee thing :-) */ } + case Iop_V128to32: { + HReg i_dst = newVRegI(env); + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + vassert(has_msa); + addInstr(env, + MIPSInstr_MsaElm(MSA_COPY_S, v_src, i_dst, MSA_DFN_W)); + return i_dst; + } + + case Iop_V128HIto64: { + vassert(mode64); + vassert(has_msa); + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg reg = newVRegI(env); + addInstr(env, + MIPSInstr_MsaElm(MSA_COPY_S, v_src, reg, MSA_DFN_D | 1)); + return reg; + } + + case Iop_V128to64: { + vassert(mode64); + vassert(has_msa); + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg reg = newVRegI(env); + addInstr(env, + MIPSInstr_MsaElm(MSA_COPY_S, v_src, reg, MSA_DFN_D | 0)); + return reg; + } + + case Iop_F32toF16x4: { + vassert(mode64); + vassert(has_msa); + HReg v_arg = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_src = newVRegV(env); + set_guest_MIPS_rounding_mode_MSA(env); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FEXDO, MSA_F_WH, + v_src, v_arg, v_arg)); + set_MIPS_rounding_default_MSA(env); + HReg reg = newVRegI(env); + addInstr(env, + MIPSInstr_MsaElm(MSA_COPY_S, v_src, reg, MSA_DFN_D | 0)); + return reg; + } + + default: break; } @@ -1981,7 +2340,6 @@ static MIPSRH *iselWordExpr_RH5u_wrk(ISelEnv * env, IRExpr * e) /* --------------------- RH6u --------------------- */ -/* Only used in 64-bit mode. */ static MIPSRH *iselWordExpr_RH6u ( ISelEnv * env, IRExpr * e ) { MIPSRH *ri; @@ -1997,7 +2355,7 @@ static MIPSRH *iselWordExpr_RH6u ( ISelEnv * env, IRExpr * e ) vassert(hregIsVirtual(ri->Mrh.Reg.reg)); return ri; default: - vpanic("iselIntExpr_RH6u: unknown mips64 RI tag"); + vpanic("iselIntExpr_RH6u: unknown RI tag"); } } @@ -2019,6 +2377,46 @@ static MIPSRH *iselWordExpr_RH6u_wrk ( ISelEnv * env, IRExpr * e ) /* default case: calculate into a register and return that */ return MIPSRH_Reg(iselWordExpr_R(env, e)); } +/* --------------------- RH7u --------------------- */ + +static MIPSRH *iselWordExpr_RH7u ( ISelEnv * env, IRExpr * e ) +{ + MIPSRH *ri; + ri = iselWordExpr_RH7u_wrk(env, e); + /* sanity checks ... */ + switch (ri->tag) { + case Mrh_Imm: + vassert(ri->Mrh.Imm.imm16 >= 1 && ri->Mrh.Imm.imm16 <= 127); + vassert(!ri->Mrh.Imm.syned); + return ri; + case Mrh_Reg: + vassert(hregClass(ri->Mrh.Reg.reg) == HRcGPR(env->mode64)); + vassert(hregIsVirtual(ri->Mrh.Reg.reg)); + return ri; + default: + vpanic("iselIntExpr_RH7u: unknown RI tag"); + } +} + +/* DO NOT CALL THIS DIRECTLY ! */ +static MIPSRH *iselWordExpr_RH7u_wrk ( ISelEnv * env, IRExpr * e ) +{ + IRType ty = typeOfIRExpr(env->type_env, e); + vassert(ty == Ity_I8); + + /* special case: immediate */ + if (e->tag == Iex_Const + && e->Iex.Const.con->tag == Ico_U8 + && e->Iex.Const.con->Ico.U8 >= 1 && e->Iex.Const.con->Ico.U8 <= 127) + { + return MIPSRH_Imm(False /*unsigned */ , + e->Iex.Const.con->Ico.U8); + } + + /* default case: calculate into a register and return that */ + return MIPSRH_Reg(iselWordExpr_R(env, e)); +} + /* --------------------- CONDCODE --------------------- */ @@ -2155,78 +2553,2123 @@ static MIPSCondCode iselCondCode_wrk(ISelEnv * env, IRExpr * e) } /*---------------------------------------------------------*/ -/*--- ISEL: Integer expressions (128 bit) ---*/ +/*--- ISEL: Vector expressions (128 bit - SIMD) ---*/ /*---------------------------------------------------------*/ -/* 64-bit mode ONLY: compute a 128-bit value into a register pair, - which is returned as the first two parameters. As with - iselWordExpr_R, these may be either real or virtual regs; in any - case they must not be changed by subsequent code emitted by the - caller. */ - -static void iselInt128Expr(HReg * rHi, HReg * rLo, ISelEnv * env, IRExpr * e) -{ - vassert(env->mode64); - iselInt128Expr_wrk(rHi, rLo, env, e); - vassert(hregClass(*rHi) == HRcGPR(env->mode64)); - vassert(hregIsVirtual(*rHi)); - vassert(hregClass(*rLo) == HRcGPR(env->mode64)); - vassert(hregIsVirtual(*rLo)); +/* Compute a vector value into vector register. */ +static HReg iselV128Expr (ISelEnv* env, IRExpr* e) { + vassert(has_msa); + HReg r = iselV128Expr_wrk(env, e); + vassert(hregClass(r) == HRcVec128); + vassert(hregIsVirtual(r)); + return r; } /* DO NOT CALL THIS DIRECTLY ! */ -static void iselInt128Expr_wrk(HReg * rHi, HReg * rLo, ISelEnv * env, - IRExpr * e) -{ +static HReg iselV128Expr_wrk(ISelEnv* env, IRExpr* e) { + IRType ty = typeOfIRExpr(env->type_env, e); vassert(e); - vassert(typeOfIRExpr(env->type_env, e) == Ity_I128); + vassert(ty == Ity_V128); - /* read 128-bit IRTemp */ if (e->tag == Iex_RdTmp) { - lookupIRTempPair(rHi, rLo, env, e->Iex.RdTmp.tmp); - return; + return lookupIRTemp(env, e->Iex.RdTmp.tmp); } - /* --------- BINARY ops --------- */ - if (e->tag == Iex_Binop) { - switch (e->Iex.Binop.op) { - /* 64 x 64 -> 128 multiply */ - case Iop_MullU64: - case Iop_MullS64: { - HReg tLo = newVRegI(env); - HReg tHi = newVRegI(env); - Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64); - HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1); - HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2); - addInstr(env, MIPSInstr_Mult(syned, r_srcL, r_srcR)); - addInstr(env, MIPSInstr_Mfhi(tHi)); - addInstr(env, MIPSInstr_Mflo(tLo)); - *rHi = tHi; - *rLo = tLo; - return; - } + if (e->tag == Iex_Load) { + vassert (e->Iex.Load.ty == Ity_V128); + HReg v_dst = newVRegV(env); + addInstr(env, MIPSInstr_MsaMi10(MSA_LD, 0, iselWordExpr_R(env, + e->Iex.Load.addr), v_dst, MSA_B)); + return v_dst; + } - /* 64HLto128(e1,e2) */ - case Iop_64HLto128: - *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1); - *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2); - return; + if (e->tag == Iex_Get) { + HReg v_dst = newVRegV(env); +#if defined(_MIPSEB) + HReg r_addr = newVRegI(env); + addInstr(env, MIPSInstr_Alu(mode64 ? Malu_DADD : Malu_ADD, r_addr, GuestStatePointer(mode64), + MIPSRH_Imm(False, e->Iex.Get.offset))); + addInstr(env, MIPSInstr_MsaMi10(MSA_LD, 0, r_addr, v_dst, MSA_B)); +#else + vassert(!(e->Iex.Get.offset & 7)); + addInstr(env, MIPSInstr_MsaMi10(MSA_LD, e->Iex.Get.offset >> 3, + GuestStatePointer(mode64), v_dst, MSA_D)); +#endif + return v_dst; + } - case Iop_DivModU64to64: - case Iop_DivModS64to64: { - HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1); - HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2); - HReg tLo = newVRegI(env); - HReg tHi = newVRegI(env); - Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to64); + if (e->tag == Iex_Unop) { + IROp op_unop = e->Iex.Unop.op; - addInstr(env, MIPSInstr_Div(syned, False, r_srcL, r_srcR)); - addInstr(env, MIPSInstr_Mfhi(tHi)); - addInstr(env, MIPSInstr_Mflo(tLo)); - *rHi = tHi; - *rLo = tLo; - return; - } + switch (op_unop) { + case Iop_Abs64x2: { + HReg v_dst = newVRegV(env); + HReg v_help = newVRegV(env); + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + addInstr(env, + MIPSInstr_Msa3R(MSA_SUBV, MSA_D, v_help, v_src, v_src)); + addInstr(env, + MIPSInstr_Msa3R(MSA_ADD_A, MSA_D, + v_dst, v_src, v_help)); + return v_dst; + } + + case Iop_Abs32x4: { + HReg v_dst = newVRegV(env); + HReg v_help = newVRegV(env); + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + addInstr(env, + MIPSInstr_Msa3R(MSA_SUBV, MSA_W, v_help, v_src, v_src)); + addInstr(env, + MIPSInstr_Msa3R(MSA_ADD_A, MSA_W, + v_dst, v_src, v_help)); + return v_dst; + } + + case Iop_Abs16x8: { + HReg v_dst = newVRegV(env); + HReg v_help = newVRegV(env); + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + addInstr(env, + MIPSInstr_Msa3R(MSA_SUBV, MSA_H, v_help, v_src, v_src)); + addInstr(env, + MIPSInstr_Msa3R(MSA_ADD_A, MSA_H, + v_dst, v_src, v_help)); + return v_dst; + } + + case Iop_Abs8x16: { + HReg v_dst = newVRegV(env); + HReg v_help = newVRegV(env); + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + addInstr(env, + MIPSInstr_Msa3R(MSA_SUBV, MSA_B, v_help, v_src, v_src)); + addInstr(env, + MIPSInstr_Msa3R(MSA_ADD_A, MSA_B, + v_dst, v_src, v_help)); + return v_dst; + } + + case Iop_Cnt8x16: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg res = newVRegV(env); + addInstr(env, MIPSInstr_Msa2R(MSA_PCNT, MSA_B, v_src, res)); + return res; + } + + case Iop_NotV128: { + HReg v_dst = newVRegV(env); + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + addInstr(env, MIPSInstr_MsaVec(MSA_NORV, v_dst, v_src, v_src)); + return v_dst; + } + + case Iop_Reverse8sIn16_x8: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_tmp = newVRegV(env); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVEV, MSA_B, v_tmp, v_src, v_src)); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVOD, MSA_B, v_src, v_tmp, v_src)); + return v_src; + } + + case Iop_Reverse8sIn32_x4: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_tmp = newVRegV(env); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVEV, MSA_H, v_tmp, v_src, v_src)); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVOD, MSA_H, v_src, v_tmp, v_src)); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVEV, MSA_B, v_tmp, v_src, v_src)); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVOD, MSA_B, v_src, v_tmp, v_src)); + return v_src; + } + + case Iop_Reverse8sIn64_x2: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_tmp = newVRegV(env); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVEV, MSA_W, v_tmp, v_src, v_src)); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVOD, MSA_W, v_src, v_tmp, v_src)); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVEV, MSA_H, v_tmp, v_src, v_src)); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVOD, MSA_H, v_src, v_tmp, v_src)); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVEV, MSA_B, v_tmp, v_src, v_src)); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVOD, MSA_B, v_src, v_tmp, v_src)); + return v_src; + } + + case Iop_Cls8x16: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + addInstr(env, MIPSInstr_Msa2R(MSA_NLOC, MSA_B, v_src, v_dst)); + return v_dst; + } + + case Iop_Cls16x8: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + addInstr(env, MIPSInstr_Msa2R(MSA_NLOC, MSA_H, v_src, v_dst)); + return v_dst; + } + + case Iop_Cls32x4: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + addInstr(env, MIPSInstr_Msa2R(MSA_NLOC, MSA_W, v_src, v_dst)); + return v_dst; + } + + case Iop_Clz8x16: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + addInstr(env, MIPSInstr_Msa2R(MSA_NLZC, MSA_B, v_src, v_dst)); + return v_dst; + } + + case Iop_Clz16x8: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + addInstr(env, MIPSInstr_Msa2R(MSA_NLZC, MSA_H, v_src, v_dst)); + return v_dst; + } + + case Iop_Clz32x4: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + addInstr(env, MIPSInstr_Msa2R(MSA_NLZC, MSA_W, v_src, v_dst)); + return v_dst; + } + + case Iop_Clz64x2: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + addInstr(env, MIPSInstr_Msa2R(MSA_NLZC, MSA_D, v_src, v_dst)); + return v_dst; + } + + case Iop_Abs32Fx4: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + HReg v_help = newVRegV(env); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FMUL, MSA_F_WH, + v_help, v_src, v_src)); + addInstr(env, + MIPSInstr_Msa2RF(MSA_FSQRT, MSA_F_WH, v_dst, v_help)); + return v_dst; + } + + case Iop_Abs64Fx2: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + HReg v_help = newVRegV(env); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FMUL, MSA_F_DW, + v_help, v_src, v_src)); + addInstr(env, + MIPSInstr_Msa2RF(MSA_FSQRT, MSA_F_DW, v_dst, v_help)); + return v_dst; + } + + case Iop_RecipEst32Fx4: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + set_guest_MIPS_rounding_mode_MSA(env); + addInstr(env, + MIPSInstr_Msa2RF(MSA_FRCP, MSA_F_WH, v_dst, v_src)); + set_MIPS_rounding_default_MSA(env); + return v_dst; + } + + case Iop_RecipEst64Fx2: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + set_guest_MIPS_rounding_mode_MSA(env); + addInstr(env, + MIPSInstr_Msa2RF(MSA_FRCP, MSA_F_DW, v_dst, v_src)); + set_MIPS_rounding_default_MSA(env); + return v_dst; + } + + case Iop_RSqrtEst32Fx4: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + set_guest_MIPS_rounding_mode_MSA(env); + addInstr(env, + MIPSInstr_Msa2RF(MSA_FRSQRT, MSA_F_WH, v_dst, v_src)); + set_MIPS_rounding_default_MSA(env); + return v_dst; + } + + case Iop_RSqrtEst64Fx2: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + set_guest_MIPS_rounding_mode_MSA(env); + addInstr(env, + MIPSInstr_Msa2RF(MSA_FRSQRT, MSA_F_DW, v_dst, v_src)); + set_MIPS_rounding_default_MSA(env); + return v_dst; + } + + case Iop_F16toF32x4: { + HReg v_dst = newVRegV(env); + + if (mode64) { + HReg r_src; + r_src = iselWordExpr_R(env, e->Iex.Unop.arg); + addInstr(env, + MIPSInstr_Msa2R(MSA_FILL, MSA_D, r_src, v_dst)); + addInstr(env, + MIPSInstr_MsaElm(MSA_INSERT, r_src, v_dst, + MSA_DFN_D | 1)); + } else { + HReg r_srch, r_srcl; + iselInt64Expr(&r_srch, &r_srcl, env, e->Iex.Unop.arg); + addInstr(env, + MIPSInstr_Msa2R(MSA_FILL, MSA_W, r_srcl, v_dst)); + addInstr(env, + MIPSInstr_MsaElm(MSA_INSERT, r_srch, v_dst, + MSA_DFN_W | 1)); + addInstr(env, + MIPSInstr_MsaElm(MSA_INSERT, r_srcl, v_dst, + MSA_DFN_W | 2)); + addInstr(env, + MIPSInstr_MsaElm(MSA_INSERT, r_srch, v_dst, + MSA_DFN_W | 3)); + } + + addInstr(env, + MIPSInstr_Msa2RF(MSA_FEXUPR, MSA_F_WH, v_dst, v_dst)); + return v_dst; + } + + case Iop_I32UtoFx4: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + set_guest_MIPS_rounding_mode_MSA(env); + addInstr(env, + MIPSInstr_Msa2RF(MSA_FFINT_U, MSA_F_WH, v_dst, v_src)); + set_MIPS_rounding_default_MSA(env); + return v_dst; + } + + case Iop_FtoI32Sx4_RZ: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + addInstr(env, + MIPSInstr_Msa2RF(MSA_FTRUNC_S, MSA_F_WH, v_dst, v_src)); + return v_dst; + } + + case Iop_FtoI32Ux4_RZ: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + addInstr(env, + MIPSInstr_Msa2RF(MSA_FTRUNC_U, MSA_F_WH, v_dst, v_src)); + return v_dst; + } + + case Iop_Log2_32Fx4: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + addInstr(env, + MIPSInstr_Msa2RF(MSA_FLOG2, MSA_F_WH, v_dst, v_src)); + return v_dst; + } + + case Iop_Log2_64Fx2: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + addInstr(env, + MIPSInstr_Msa2RF(MSA_FLOG2, MSA_F_DW, v_dst, v_src)); + return v_dst; + } + case Iop_CmpNEZ8x16: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + HReg zero = Zero(mode64); + addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, zero, v_dst)); + addInstr(env, + MIPSInstr_Msa3R(MSA_CEQ, MSA_B, v_dst, v_src, v_dst)); + addInstr(env, MIPSInstr_MsaVec(MSA_NORV, v_dst, v_dst, v_dst)); + return v_dst; + } + case Iop_CmpNEZ16x8: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + HReg zero = Zero(mode64); + addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, zero, v_dst)); + addInstr(env, + MIPSInstr_Msa3R(MSA_CEQ, MSA_H, v_dst, v_src, v_dst)); + addInstr(env, MIPSInstr_MsaVec(MSA_NORV, v_dst, v_dst, v_dst)); + return v_dst; + } + case Iop_CmpNEZ32x4: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + HReg zero = Zero(mode64); + addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, zero, v_dst)); + addInstr(env, + MIPSInstr_Msa3R(MSA_CEQ, MSA_W, v_dst, v_src, v_dst)); + addInstr(env, MIPSInstr_MsaVec(MSA_NORV, v_dst, v_dst, v_dst)); + return v_dst; + } + case Iop_CmpNEZ64x2: { + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_dst = newVRegV(env); + HReg zero = Zero(mode64); + addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, zero, v_dst)); + addInstr(env, + MIPSInstr_Msa3R(MSA_CEQ, MSA_D, v_dst, v_src, v_dst)); + addInstr(env, MIPSInstr_MsaVec(MSA_NORV, v_dst, v_dst, v_dst)); + return v_dst; + } + default: + vex_printf("iselV128Expr_wrk: Unsupported unop: %u\n", op_unop); + } + } + + if (e->tag == Iex_Binop) { + IROp op_binop = e->Iex.Binop.op; + + switch (op_binop) { + case Iop_Add8x16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ADDV, MSA_B, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Add16x8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ADDV, MSA_H, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Add32x4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ADDV, MSA_W, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Add64x2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ADDV, MSA_D, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Sub8x16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SUBV, MSA_B, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Sub16x8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SUBV, MSA_H, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Sub32x4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SUBV, MSA_W, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Sub64x2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SUBV, MSA_D, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_QAdd8Sx16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ADDS_S, MSA_B, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_QAdd16Sx8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ADDS_S, MSA_H, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_QAdd32Sx4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ADDS_S, MSA_W, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_QAdd64Sx2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ADDS_S, MSA_D, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_QAdd8Ux16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ADDS_U, MSA_B, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_QAdd16Ux8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ADDS_U, MSA_H, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_QAdd32Ux4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ADDS_U, MSA_W, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_QAdd64Ux2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ADDS_U, MSA_D, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_QSub8Sx16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SUBS_S, MSA_B, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_QSub16Sx8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SUBS_S, MSA_H, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_QSub32Sx4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SUBS_S, MSA_W, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_QSub64Sx2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SUBS_S, MSA_D, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_QSub8Ux16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SUBS_U, MSA_B, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_QSub16Ux8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SUBS_U, MSA_H, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_QSub32Ux4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SUBS_U, MSA_W, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_QSub64Ux2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SUBS_U, MSA_D, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_QDMulHi32Sx4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3RF(MSA_MUL_Q, MSA_F_DW, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_QDMulHi16Sx8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3RF(MSA_MUL_Q, MSA_F_WH, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_QRDMulHi32Sx4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3RF(MSA_MULR_Q, MSA_F_DW, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_QRDMulHi16Sx8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3RF(MSA_MULR_Q, MSA_F_WH, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Max8Sx16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_MAX_S, MSA_B, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Max16Sx8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_MAX_S, MSA_H, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Max32Sx4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_MAX_S, MSA_W, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Max64Sx2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_MAX_S, MSA_D, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Max8Ux16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_MAX_U, MSA_B, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Max16Ux8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_MAX_U, MSA_H, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Max32Ux4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_MAX_U, MSA_W, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Max64Ux2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_MAX_U, MSA_D, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Min8Sx16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_MIN_S, MSA_B, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Min16Sx8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_MIN_S, MSA_H, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Min32Sx4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_MIN_S, MSA_W, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Min64Sx2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_MIN_S, MSA_D, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Min8Ux16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_MIN_U, MSA_B, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Min16Ux8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_MIN_U, MSA_H, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Min32Ux4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_MIN_U, MSA_W, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Min64Ux2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_MIN_U, MSA_D, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Shl8x16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SLL, MSA_B, v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Shl16x8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SLL, MSA_H, v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Shl32x4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SLL, MSA_W, v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Shl64x2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SLL, MSA_D, v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Shr8x16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SRL, MSA_B, v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Shr16x8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SRL, MSA_H, v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Shr32x4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SRL, MSA_W, v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Shr64x2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SRL, MSA_D, v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Sar8x16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SRA, MSA_B, v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Sar16x8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SRA, MSA_H, v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Sar32x4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SRA, MSA_W, v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Sar64x2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SRA, MSA_D, v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_InterleaveHI8x16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVL, MSA_B, v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_InterleaveHI16x8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVL, MSA_H, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_InterleaveHI32x4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVL, MSA_W, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_InterleaveHI64x2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVL, MSA_D, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_InterleaveLO8x16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVR, MSA_B, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_InterleaveLO16x8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVR, MSA_H, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_InterleaveLO32x4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVR, MSA_W, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_InterleaveLO64x2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVR, MSA_D, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_InterleaveEvenLanes8x16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVEV, MSA_B, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_InterleaveEvenLanes16x8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVEV, MSA_H, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_InterleaveEvenLanes32x4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVEV, MSA_W, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_InterleaveOddLanes8x16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVOD, MSA_B, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_InterleaveOddLanes16x8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVOD, MSA_H, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_InterleaveOddLanes32x4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_ILVOD, MSA_W, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_PackEvenLanes8x16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_PCKEV, MSA_B, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_PackEvenLanes16x8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_PCKEV, MSA_H, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_PackEvenLanes32x4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_PCKEV, MSA_W, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_PackOddLanes8x16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_PCKOD, MSA_B, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_PackOddLanes16x8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_PCKOD, MSA_H, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_PackOddLanes32x4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_PCKOD, MSA_W, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_CmpEQ8x16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_CEQ, MSA_B, v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_CmpEQ16x8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_CEQ, MSA_H, v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_CmpEQ32x4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_CEQ, MSA_W, v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_CmpEQ64x2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_CEQ, MSA_D, v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_CmpGT8Sx16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_CLT_S, MSA_B, + v_dst, v_src2, v_src1)); + return v_dst; + } + + case Iop_CmpGT16Sx8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_CLT_S, MSA_H, + v_dst, v_src2, v_src1)); + return v_dst; + } + + case Iop_CmpGT32Sx4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_CLT_S, MSA_W, + v_dst, v_src2, v_src1)); + return v_dst; + } + + case Iop_CmpGT64Sx2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_CLT_S, MSA_D, + v_dst, v_src2, v_src1)); + return v_dst; + } + + case Iop_CmpGT8Ux16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_CLT_U, MSA_B, + v_dst, v_src2, v_src1)); + return v_dst; + } + + case Iop_CmpGT16Ux8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_CLT_U, MSA_H, + v_dst, v_src2, v_src1)); + return v_dst; + } + + case Iop_CmpGT32Ux4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_CLT_U, MSA_W, + v_dst, v_src2, v_src1)); + return v_dst; + } + + case Iop_CmpGT64Ux2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_CLT_U, MSA_D, + v_dst, v_src2, v_src1)); + return v_dst; + } + + case Iop_Avg8Sx16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_AVER_S, MSA_B, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Avg16Sx8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_AVER_S, MSA_H, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Avg32Sx4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_AVER_S, MSA_W, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Avg8Ux16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_AVER_U, MSA_B, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Avg16Ux8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_AVER_U, MSA_H, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Avg32Ux4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_AVER_U, MSA_W, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Mul8x16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_MULV, MSA_B, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Mul16x8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_MULV, MSA_H, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Mul32x4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_MULV, MSA_W, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_AndV128: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, MIPSInstr_MsaVec(MSA_ANDV, v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_OrV128: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, MIPSInstr_MsaVec(MSA_ORV, v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_XorV128: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, MIPSInstr_MsaVec(MSA_XORV, v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_ShrV128: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + MIPSRH *sm; + sm = iselWordExpr_RH7u(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SUBV, MSA_B, + v_dst, v_src1, v_src1)); + + if (sm->tag == Mrh_Imm) { + int n = (sm->Mrh.Imm.imm16) >> 3; + addInstr(env, + MIPSInstr_MsaElm(MSA_SLDI, v_src1, v_dst, + MSA_DFN_B | n)); + } else { + HReg v_src2 = sm->Mrh.Reg.reg; + MIPSRH *ri = MIPSRH_Imm(False, 3); + HReg r_dst = newVRegI(env); + addInstr(env, MIPSInstr_Shft(Mshft_SRL, True /*32bit shift */, + r_dst, v_src2, ri)); + addInstr(env, + MIPSInstr_Msa3R(MSA_SLD, MSA_B, + v_dst, v_src1, r_dst)); + } + + return v_dst; + } + + case Iop_ShlV128: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + MIPSRH *sm; + sm = iselWordExpr_RH7u(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3R(MSA_SUBV, MSA_B, + v_dst, v_src1, v_src1)); + + if (sm->tag == Mrh_Imm) { + int n = 16 - ((sm->Mrh.Imm.imm16) >> 3); + + if (n == 16) n = 0; + + addInstr(env, + MIPSInstr_MsaElm(MSA_SLDI, v_dst, v_src1, + MSA_DFN_B | n)); + } else { + HReg v_src2 = sm->Mrh.Reg.reg; + MIPSRH *ri = MIPSRH_Imm(False, 3); + HReg r_dst = newVRegI(env); + HReg help = newVRegI(env); + addInstr(env, MIPSInstr_Alu(Malu_XOR, help, v_src2, sm)); + addInstr(env, MIPSInstr_Alu(Malu_SUB, help, help, sm)); + addInstr(env, MIPSInstr_Shft(Mshft_SRL, True /*32bit shift */, + r_dst, help, ri)); + addInstr(env, + MIPSInstr_Msa3R(MSA_SLD, MSA_B, + v_src1, v_dst, r_dst)); + } + + return v_src1; + } + + case Iop_ShlN8x16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + vassert(e->Iex.Binop.arg2->tag == Iex_Const); + vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); + vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63); + addInstr(env, + MIPSInstr_MsaBit(MSA_SLLI, MSA_B, + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8, + v_src1, v_dst)); + return v_dst; + } + + case Iop_ShlN16x8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + vassert(e->Iex.Binop.arg2->tag == Iex_Const); + vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); + vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63); + addInstr(env, + MIPSInstr_MsaBit(MSA_SLLI, MSA_H, + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8, + v_src1, v_dst)); + return v_dst; + } + + case Iop_ShlN32x4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + vassert(e->Iex.Binop.arg2->tag == Iex_Const); + vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); + vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63); + addInstr(env, + MIPSInstr_MsaBit(MSA_SLLI, MSA_W, + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8, + v_src1, v_dst)); + return v_dst; + } + + case Iop_ShlN64x2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + vassert(e->Iex.Binop.arg2->tag == Iex_Const); + vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); + vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63); + addInstr(env, + MIPSInstr_MsaBit(MSA_SLLI, MSA_D, + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8, + v_src1, v_dst)); + return v_dst; + } + + case Iop_SarN8x16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + vassert(e->Iex.Binop.arg2->tag == Iex_Const); + vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); + vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63); + addInstr(env, + MIPSInstr_MsaBit(MSA_SRAI, MSA_B, + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8, + v_src1, v_dst)); + return v_dst; + } + + case Iop_SarN16x8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + vassert(e->Iex.Binop.arg2->tag == Iex_Const); + vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); + vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63); + addInstr(env, + MIPSInstr_MsaBit(MSA_SRAI, MSA_H, + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8, + v_src1, v_dst)); + return v_dst; + } + + case Iop_SarN32x4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + vassert(e->Iex.Binop.arg2->tag == Iex_Const); + vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); + vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63); + addInstr(env, + MIPSInstr_MsaBit(MSA_SRAI, MSA_W, + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8, + v_src1, v_dst)); + return v_dst; + } + + case Iop_SarN64x2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + vassert(e->Iex.Binop.arg2->tag == Iex_Const); + vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); + vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63); + addInstr(env, + MIPSInstr_MsaBit(MSA_SRAI, MSA_D, + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8, + v_src1, v_dst)); + return v_dst; + } + + case Iop_ShrN8x16: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + vassert(e->Iex.Binop.arg2->tag == Iex_Const); + vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); + vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63); + addInstr(env, + MIPSInstr_MsaBit(MSA_SRLI, MSA_B, + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8, + v_src1, v_dst)); + return v_dst; + } + + case Iop_ShrN16x8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + vassert(e->Iex.Binop.arg2->tag == Iex_Const); + vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); + vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63); + addInstr(env, + MIPSInstr_MsaBit(MSA_SRLI, MSA_H, + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8, + v_src1, v_dst)); + return v_dst; + } + + case Iop_ShrN32x4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + vassert(e->Iex.Binop.arg2->tag == Iex_Const); + vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); + vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63); + addInstr(env, + MIPSInstr_MsaBit(MSA_SRLI, MSA_W, + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8, + v_src1, v_dst)); + return v_dst; + } + + case Iop_ShrN64x2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + vassert(e->Iex.Binop.arg2->tag == Iex_Const); + vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); + vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63); + addInstr(env, + MIPSInstr_MsaBit(MSA_SRLI, MSA_D, + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8, + v_src1, v_dst)); + return v_dst; + } + + case Iop_QandQSarNnarrow64Sto32Sx2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + vassert(e->Iex.Binop.arg2->tag == Iex_Const); + vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); + vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63); + addInstr(env, + MIPSInstr_MsaBit(MSA_SRAI, MSA_D, + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8, + v_src1, v_dst)); + addInstr(env, MIPSInstr_MsaBit(MSA_SAT_S, MSA_D, 31, v_dst, v_dst)); + return v_dst; + } + + case Iop_QandQSarNnarrow32Sto16Sx4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + vassert(e->Iex.Binop.arg2->tag == Iex_Const); + vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); + vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63); + addInstr(env, + MIPSInstr_MsaBit(MSA_SRAI, MSA_W, + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8, + v_src1, v_dst)); + addInstr(env, + MIPSInstr_MsaBit(MSA_SAT_S, MSA_W, 15, v_dst, v_dst)); + return v_dst; + } + + case Iop_QandQRSarNnarrow64Sto32Sx2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + vassert(e->Iex.Binop.arg2->tag == Iex_Const); + vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); + vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63); + addInstr(env, + MIPSInstr_MsaBit(MSA_SRARI, MSA_D, + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8, + v_src1, v_dst)); + addInstr(env, + MIPSInstr_MsaBit(MSA_SAT_S, MSA_D, 31, v_dst, v_dst)); + return v_dst; + } + + case Iop_QandQRSarNnarrow32Sto16Sx4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + vassert(e->Iex.Binop.arg2->tag == Iex_Const); + vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); + vassert(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 <= 63); + addInstr(env, + MIPSInstr_MsaBit(MSA_SRARI, MSA_W, + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8, + v_src1, v_dst)); + addInstr(env, + MIPSInstr_MsaBit(MSA_SAT_S, MSA_W, 15, v_dst, v_dst)); + return v_dst; + } + + case Iop_CmpEQ32Fx4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FCEQ, MSA_F_WH, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_CmpEQ64Fx2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FCEQ, MSA_F_DW, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_CmpLT32Fx4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FCLT, MSA_F_WH, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_CmpLT64Fx2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FCLT, MSA_F_DW, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_CmpLE32Fx4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FCLE, MSA_F_WH, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_CmpLE64Fx2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FCLE, MSA_F_DW, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_CmpUN32Fx4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FCUN, MSA_F_WH, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_CmpUN64Fx2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FCUN, MSA_F_DW, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_64HLtoV128: { + HReg v_dst = newVRegV(env); + + if (mode64) { + HReg r_src1; + HReg r_src2; + r_src1 = iselWordExpr_R(env, e->Iex.Binop.arg1); + r_src2 = iselWordExpr_R(env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa2R(MSA_FILL, MSA_D, r_src2, v_dst)); + addInstr(env, + MIPSInstr_MsaElm(MSA_INSERT, r_src1, v_dst, + MSA_DFN_D | 1)); + } else { + HReg r_src1h, r_src1l; + HReg r_src2h, r_src2l; + iselInt64Expr(&r_src1h, &r_src1l, env, e->Iex.Binop.arg1); + iselInt64Expr(&r_src2h, &r_src2l, env, e->Iex.Binop.arg2); + addInstr(env, + MIPSInstr_Msa2R(MSA_FILL, MSA_W, r_src2l, v_dst)); + addInstr(env, + MIPSInstr_MsaElm(MSA_INSERT, r_src2h, v_dst, + MSA_DFN_W | 1)); + addInstr(env, + MIPSInstr_MsaElm(MSA_INSERT, r_src1l, v_dst, + MSA_DFN_W | 2)); + addInstr(env, + MIPSInstr_MsaElm(MSA_INSERT, r_src1h, v_dst, + MSA_DFN_W | 3)); + } + + return v_dst; + } + + case Iop_Min32Fx4: { + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + HReg v_dst = newVRegV(env); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FMIN, MSA_F_WH, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Min64Fx2: { + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + HReg v_dst = newVRegV(env); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FMIN, MSA_F_DW, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Max32Fx4: { + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + HReg v_dst = newVRegV(env); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FMAX, MSA_F_WH, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Max64Fx2: { + HReg v_src1 = iselV128Expr(env, e->Iex.Binop.arg1); + HReg v_src2 = iselV128Expr(env, e->Iex.Binop.arg2); + HReg v_dst = newVRegV(env); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FMAX, MSA_F_DW, + v_dst, v_src1, v_src2)); + return v_dst; + } + + case Iop_Sqrt32Fx4: { + HReg v_src = iselV128Expr(env, e->Iex.Binop.arg2); + HReg v_dst = newVRegV(env); + set_MIPS_rounding_mode_MSA(env, e->Iex.Binop.arg1); + addInstr(env, + MIPSInstr_Msa2RF(MSA_FSQRT, MSA_F_WH, v_dst, v_src)); + set_MIPS_rounding_default_MSA(env); + return v_dst; + } + + case Iop_Sqrt64Fx2: { + HReg v_src = iselV128Expr(env, e->Iex.Binop.arg2); + HReg v_dst = newVRegV(env); + set_MIPS_rounding_mode_MSA(env, e->Iex.Binop.arg1); + addInstr(env, + MIPSInstr_Msa2RF(MSA_FSQRT, MSA_F_DW, v_dst, v_src)); + set_MIPS_rounding_default_MSA(env); + return v_dst; + } + + default: + vex_printf("iselV128Expr_wrk: unsupported binop: %x\n", op_binop); + } + } + + if (e->tag == Iex_Triop) { + IROp op_triop = e->Iex.Triop.details->op; + + switch (op_triop) { + case Iop_Add32Fx4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2); + HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3); + set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FADD, MSA_F_WH, + v_dst, v_src1, v_src2)); + set_MIPS_rounding_default_MSA(env); + return v_dst; + } + + case Iop_Add64Fx2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2); + HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3); + set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FADD, MSA_F_DW, + v_dst, v_src1, v_src2)); + set_MIPS_rounding_default_MSA(env); + return v_dst; + } + + case Iop_Sub32Fx4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2); + HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3); + set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FSUB, MSA_F_WH, + v_dst, v_src1, v_src2)); + set_MIPS_rounding_default_MSA(env); + return v_dst; + } + + case Iop_Sub64Fx2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2); + HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3); + set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FSUB, MSA_F_DW, + v_dst, v_src1, v_src2)); + set_MIPS_rounding_default_MSA(env); + return v_dst; + } + + case Iop_Mul32Fx4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2); + HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3); + set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FMUL, MSA_F_WH, + v_dst, v_src1, v_src2)); + set_MIPS_rounding_default_MSA(env); + return v_dst; + } + + case Iop_Mul64Fx2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2); + HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3); + set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FMUL, MSA_F_DW, + v_dst, v_src1, v_src2)); + set_MIPS_rounding_default_MSA(env); + return v_dst; + } + + case Iop_Div32Fx4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2); + HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3); + set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FDIV, MSA_F_WH, + v_dst, v_src1, v_src2)); + set_MIPS_rounding_default_MSA(env); + return v_dst; + } + + case Iop_Div64Fx2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2); + HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3); + set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FDIV, MSA_F_DW, + v_dst, v_src1, v_src2)); + set_MIPS_rounding_default_MSA(env); + return v_dst; + } + + case Iop_F32x4_2toQ16x8: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2); + HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3); + set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FTQ, MSA_F_WH, + v_dst, v_src1, v_src2)); + set_MIPS_rounding_default_MSA(env); + return v_dst; + } + + case Iop_F64x2_2toQ32x4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2); + HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3); + set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FTQ, MSA_F_DW, + v_dst, v_src1, v_src2)); + set_MIPS_rounding_default_MSA(env); + return v_dst; + } + + case Iop_Scale2_32Fx4: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2); + HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3); + set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FEXP2, MSA_F_WH, + v_dst, v_src1, v_src2)); + set_MIPS_rounding_default_MSA(env); + return v_dst; + } + + case Iop_Scale2_64Fx2: { + HReg v_dst = newVRegV(env); + HReg v_src1 = iselV128Expr(env, e->Iex.Triop.details->arg2); + HReg v_src2 = iselV128Expr(env, e->Iex.Triop.details->arg3); + set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1); + addInstr(env, + MIPSInstr_Msa3RF(MSA_FEXP2, MSA_F_DW, + v_dst, v_src1, v_src2)); + set_MIPS_rounding_default_MSA(env); + return v_dst; + } + + default: + vex_printf("iselV128Expr_wrk: unsupported triop: %x\n", op_triop); + } + } + + if (e->tag == Iex_Const) { + IRConst *con = e->Iex.Const.con; + + if (con->tag != Ico_V128) { + vpanic("iselV128Expr.const(mips)"); + } else { + HReg v_dst = newVRegV(env); + UShort val = con->Ico.V128; + HReg zero = Zero(mode64); + + switch (val) { + case 0: /* likely */ + addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, zero, v_dst)); + break; + + default: { + HReg r_tmp = newVRegI(env); + UInt i; + addInstr(env, MIPSInstr_LI(r_tmp, 0xfful)); + + if (val & 1) { + addInstr(env, + MIPSInstr_Msa2R(MSA_FILL, MSA_B, r_tmp, v_dst)); + } else { + addInstr(env, + MIPSInstr_Msa2R(MSA_FILL, MSA_B, zero, v_dst)); + } + + for (i = 1; i < 16; i++) { + val >>= 1; + + if (val & 1) { + addInstr(env, + MIPSInstr_MsaElm(MSA_INSERT, r_tmp, v_dst, + MSA_DFN_B | i)); + } else { + addInstr(env, + MIPSInstr_MsaElm(MSA_INSERT, zero, v_dst, + MSA_DFN_B | i)); + } + } + + break; + } + } + + return v_dst; + } + } + + if (e->tag == Iex_ITE) { + HReg v_dst = newVRegV(env); + HReg iff = iselV128Expr(env, e->Iex.ITE.iffalse); + HReg ift = iselV128Expr(env, e->Iex.ITE.iftrue); + HReg r_cond = iselWordExpr_R(env, e->Iex.ITE.cond); + addInstr(env, MIPSInstr_Shft(Mshft_SLL, True, r_cond, r_cond, + MIPSRH_Imm(False, 1))); + addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, r_cond, v_dst)); + addInstr(env, + MIPSInstr_Alu(Malu_ADD, r_cond, r_cond, MIPSRH_Imm(True, 1))); + addInstr(env, MIPSInstr_MsaElm(MSA_INSERT, r_cond, v_dst, MSA_DFN_W | 2)); + addInstr(env, MIPSInstr_Msa3R(MSA_VSHF, MSA_D, v_dst, ift, iff)); + return v_dst; + } + + vex_printf("iselV128Expr_wrk: Unsupported tag: %x\n", e->tag); + ppIRExpr(e); + vpanic("iselV128Expr(mips)"); +} + +/*---------------------------------------------------------*/ +/*--- ISEL: Integer expressions (128 bit) ---*/ +/*---------------------------------------------------------*/ + +/* 64-bit mode ONLY: compute a 128-bit value into a register pair, + which is returned as the first two parameters. As with + iselWordExpr_R, these may be either real or virtual regs; in any + case they must not be changed by subsequent code emitted by the + caller. */ + +static void iselInt128Expr(HReg * rHi, HReg * rLo, ISelEnv * env, IRExpr * e) +{ + vassert(env->mode64); + iselInt128Expr_wrk(rHi, rLo, env, e); + vassert(hregClass(*rHi) == HRcGPR(env->mode64)); + vassert(hregIsVirtual(*rHi)); + vassert(hregClass(*rLo) == HRcGPR(env->mode64)); + vassert(hregIsVirtual(*rLo)); +} + +/* DO NOT CALL THIS DIRECTLY ! */ +static void iselInt128Expr_wrk(HReg * rHi, HReg * rLo, ISelEnv * env, + IRExpr * e) +{ + vassert(e); + vassert(typeOfIRExpr(env->type_env, e) == Ity_I128); + + /* read 128-bit IRTemp */ + if (e->tag == Iex_RdTmp) { + lookupIRTempPair(rHi, rLo, env, e->Iex.RdTmp.tmp); + return; + } + + /* --------- BINARY ops --------- */ + if (e->tag == Iex_Binop) { + switch (e->Iex.Binop.op) { + /* 64 x 64 -> 128 multiply */ + case Iop_MullU64: + case Iop_MullS64: { + HReg tLo = newVRegI(env); + HReg tHi = newVRegI(env); + Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64); + HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1); + HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2); + addInstr(env, MIPSInstr_Mult(syned, r_srcL, r_srcR)); + addInstr(env, MIPSInstr_Mfhi(tHi)); + addInstr(env, MIPSInstr_Mflo(tLo)); + *rHi = tHi; + *rLo = tLo; + return; + } + + /* 64HLto128(e1,e2) */ + case Iop_64HLto128: + *rHi = iselWordExpr_R(env, e->Iex.Binop.arg1); + *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2); + return; + + case Iop_DivModU64to64: + case Iop_DivModS64to64: { + HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1); + HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2); + HReg tLo = newVRegI(env); + HReg tHi = newVRegI(env); + Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to64); + + addInstr(env, MIPSInstr_Div(syned, False, r_srcL, r_srcR)); + addInstr(env, MIPSInstr_Mfhi(tHi)); + addInstr(env, MIPSInstr_Mflo(tLo)); + *rHi = tHi; + *rLo = tLo; + return; + } default: break; @@ -2344,6 +4787,27 @@ static void iselInt64Expr_wrk(HReg * rHi, HReg * rLo, ISelEnv * env, IRExpr * e) return; } + if (e->tag == Iex_CCall) { + HReg r_dstH = newVRegI(env); + HReg r_dstL = newVRegI(env); + vassert(e->Iex.CCall.retty == Ity_I64); + + /* Marshal args, do the call, clear stack. */ + UInt addToSp = 0; + RetLoc rloc = mk_RetLoc_INVALID(); + doHelperCall(&addToSp, &rloc, env, NULL/*guard*/, e->Iex.CCall.cee, + e->Iex.CCall.retty, e->Iex.CCall.args ); + + vassert(is_sane_RetLoc(rloc)); + vassert(rloc.pri == RLPri_2Int); + vassert(addToSp == 0); + addInstr(env, mk_iMOVds_RR(r_dstL, hregMIPS_GPR2(False))); + addInstr(env, mk_iMOVds_RR(r_dstH, hregMIPS_GPR3(False))); + *rHi = r_dstH; + *rLo = r_dstL; + return; + } + /* --------- BINARY ops --------- */ if (e->tag == Iex_Binop) { IROp op_binop = e->Iex.Binop.op; @@ -2765,6 +5229,129 @@ static void iselInt64Expr_wrk(HReg * rHi, HReg * rLo, ISelEnv * env, IRExpr * e) return; } + case Iop_F64toI64U: { + HReg r_src; + HReg tmp = newVRegV(env); + vassert(has_msa); + r_src = iselDblExpr( env, e->Iex.Binop.arg2); + set_MIPS_rounding_mode_MSA(env, e->Iex.Binop.arg1); + addInstr(env, MIPSInstr_Msa2RF(MSA_FTINT_U, MSA_F_DW, tmp, r_src)); + HReg r_dsth = newVRegI(env); + HReg r_dstl = newVRegI(env); + addInstr(env, + MIPSInstr_MsaElm(MSA_COPY_S, tmp, r_dstl, MSA_DFN_W | 0)); + addInstr(env, + MIPSInstr_MsaElm(MSA_COPY_S, tmp, r_dsth, MSA_DFN_W | 1)); + *rHi = r_dsth; + *rLo = r_dstl; + set_MIPS_rounding_default_MSA(env); + return; + } + + case Iop_GetElem64x2: { + vassert(has_msa); + HReg v_src = iselV128Expr(env, e->Iex.Binop.arg1); + HReg r_dstHI = newVRegI(env); + HReg r_dstLO = newVRegI(env); + MIPSRH *tmp = iselWordExpr_RH(env, False, e->Iex.Binop.arg2); + + switch (tmp->tag) { + case Mrh_Imm: + addInstr(env, + MIPSInstr_MsaElm(MSA_COPY_S, v_src, r_dstHI, + MSA_DFN_W | + (((tmp->Mrh.Imm.imm16 & 0x01) << 1) + + 1))); + addInstr(env, + MIPSInstr_MsaElm(MSA_COPY_S, v_src, r_dstLO, + MSA_DFN_W | + ((tmp->Mrh.Imm.imm16 & 0x01) << 1))); + break; + + case Mrh_Reg: { + HReg v_tmp = newVRegV(env); + addInstr(env, + MIPSInstr_Msa3R(MSA_SPLAT, MSA_D, v_tmp, v_src, + tmp->Mrh.Reg.reg)); + addInstr(env, + MIPSInstr_MsaElm(MSA_COPY_S, v_tmp, r_dstHI, + MSA_DFN_W | 1)); + addInstr(env, + MIPSInstr_MsaElm(MSA_COPY_S, v_tmp, r_dstLO, + MSA_DFN_W)); + break; + } + } + + *rHi = r_dstHI; + *rLo = r_dstLO; + return; + } + + case Iop_Mul64: { + HReg a_L, a_H, b_L, b_H; + HReg dst_L = newVRegI(env); + HReg dst_H = newVRegI(env); + + iselInt64Expr(&a_H, &a_L, env, e->Iex.Binop.arg1); + iselInt64Expr(&b_H, &b_L, env, e->Iex.Binop.arg2); + addInstr(env, MIPSInstr_Mul(dst_H, a_H, b_L)); + addInstr(env, MIPSInstr_Mult(True, b_H, a_L)); + addInstr(env, MIPSInstr_Mflo(dst_L)); + addInstr(env, MIPSInstr_Alu(Malu_ADD, dst_H, dst_H, + MIPSRH_Reg(dst_L))); + addInstr(env, MIPSInstr_Mult(False, a_L, b_L)); + addInstr(env, MIPSInstr_Mfhi(dst_L)); + + addInstr(env, MIPSInstr_Alu(Malu_ADD, dst_H, dst_H, + MIPSRH_Reg(dst_L))); + addInstr(env, MIPSInstr_Mflo(dst_L)); + *rHi = dst_H; + *rLo = dst_L; + return; + } + + case Iop_DivS64: { + HReg src1_L, src1_H, src2_L, src2_H; + HReg dst_L = newVRegI(env); + HReg dst_H = newVRegI(env); + HReg tmp1 = newVRegV(env); + HReg tmp2 = newVRegV(env); + vassert(has_msa); + iselInt64Expr(&src1_H, &src1_L, env, e->Iex.Binop.arg1); + iselInt64Expr(&src2_H, &src2_L, env, e->Iex.Binop.arg2); + addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, src1_L, tmp1)); + addInstr(env, MIPSInstr_MsaElm(MSA_INSERT, src1_H, tmp1, MSA_DFN_W | 1)); + addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, src2_L, tmp2)); + addInstr(env, MIPSInstr_MsaElm(MSA_INSERT, src2_H, tmp2, MSA_DFN_W | 1)); + addInstr(env, MIPSInstr_Msa3R(MSA_DIVS, MSA_D, tmp1, tmp1, tmp2)); + addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, tmp1, dst_H, MSA_DFN_W | 1)); + addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, tmp1, dst_L, MSA_DFN_W | 0)); + *rHi = dst_H; + *rLo = dst_L; + return; + } + + case Iop_DivU64: { + HReg src1_L, src1_H, src2_L, src2_H; + HReg dst_L = newVRegI(env); + HReg dst_H = newVRegI(env); + HReg tmp1 = newVRegV(env); + HReg tmp2 = newVRegV(env); + vassert(has_msa); + iselInt64Expr(&src1_H, &src1_L, env, e->Iex.Binop.arg1); + iselInt64Expr(&src2_H, &src2_L, env, e->Iex.Binop.arg2); + addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, src1_L, tmp1)); + addInstr(env, MIPSInstr_MsaElm(MSA_INSERT, src1_H, tmp1, MSA_DFN_W | 1)); + addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, src2_L, tmp2)); + addInstr(env, MIPSInstr_MsaElm(MSA_INSERT, src2_H, tmp2, MSA_DFN_W | 1)); + addInstr(env, MIPSInstr_Msa3R(MSA_DIVU, MSA_D, tmp1, tmp1, tmp2)); + addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, tmp1, dst_H, MSA_DFN_W | 1)); + addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, tmp1, dst_L, MSA_DFN_W | 0)); + *rHi = dst_H; + *rLo = dst_L; + return; + } default: break; @@ -2793,6 +5380,25 @@ static void iselInt64Expr_wrk(HReg * rHi, HReg * rLo, ISelEnv * env, IRExpr * e) return; } + case Iop_8Sto64: + case Iop_16Sto64: { + HReg tLo = newVRegI(env); + HReg tHi = newVRegI(env); + HReg src = iselWordExpr_R(env, e->Iex.Unop.arg); + UInt no_bits = (e->Iex.Unop.op == Iop_8Sto64) ? 24 : 16; + addInstr(env, mk_iMOVds_RR(tLo, src)); + addInstr(env, MIPSInstr_Shft(Mshft_SLL, True, tLo, tLo, + MIPSRH_Imm(False, no_bits))); + addInstr(env, MIPSInstr_Shft(Mshft_SRA, True, tHi, tLo, + MIPSRH_Imm(False, 31))); + addInstr(env, MIPSInstr_Shft(Mshft_SRA, True, tLo, tLo, + MIPSRH_Imm(False, no_bits))); + addInstr(env, mk_iMOVds_RR(tHi, tLo)); + *rHi = tHi; + *rLo = tLo; + return; + } + /* 32Sto64(e) */ case Iop_32Sto64: { HReg tLo = newVRegI(env); @@ -2807,13 +5413,14 @@ static void iselInt64Expr_wrk(HReg * rHi, HReg * rLo, ISelEnv * env, IRExpr * e) return; } - /* 8Uto64(e) */ - case Iop_8Uto64: { + case Iop_8Uto64: + case Iop_16Uto64: { HReg tLo = newVRegI(env); HReg tHi = newVRegI(env); HReg src = iselWordExpr_R(env, e->Iex.Unop.arg); + UInt mask = (e->Iex.Unop.op == Iop_8Sto64) ? 0xFF : 0xFFFF; addInstr(env, MIPSInstr_Alu(Malu_AND, tLo, src, - MIPSRH_Imm(False, 0xFF))); + MIPSRH_Imm(False, mask))); addInstr(env, MIPSInstr_Alu(Malu_ADD, tHi, hregMIPS_GPR0(mode64), MIPSRH_Reg(hregMIPS_GPR0(mode64)))); *rHi = tHi; @@ -2927,7 +5534,46 @@ static void iselInt64Expr_wrk(HReg * rHi, HReg * rLo, ISelEnv * env, IRExpr * e) *rHi = tHi; *rLo = tLo; + return; + } + + case Iop_V128HIto64: { + vassert(has_msa); + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg tLo = newVRegI(env); + HReg tHi = newVRegI(env); + addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, v_src, tLo, MSA_DFN_W | 2)); + addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, v_src, tHi, MSA_DFN_W | 3)); + *rLo = tLo; + *rHi = tHi; + return; + } + + case Iop_V128to64: { + vassert(has_msa); + HReg v_src = iselV128Expr(env, e->Iex.Unop.arg); + HReg tLo = newVRegI(env); + HReg tHi = newVRegI(env); + addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, v_src, tLo, MSA_DFN_W | 0)); + addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, v_src, tHi, MSA_DFN_W | 1)); + *rLo = tLo; + *rHi = tHi; + return; + } + case Iop_F32toF16x4: { + vassert(has_msa); + HReg v_arg = iselV128Expr(env, e->Iex.Unop.arg); + HReg v_src = newVRegV(env); + set_guest_MIPS_rounding_mode_MSA(env); + addInstr(env, MIPSInstr_Msa3RF(MSA_FEXDO, MSA_F_WH, v_src, v_arg, v_arg)); + set_MIPS_rounding_default_MSA(env); + HReg tLo = newVRegI(env); + HReg tHi = newVRegI(env); + addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, v_src, tLo, MSA_DFN_W | 0)); + addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, v_src, tHi, MSA_DFN_W | 1)); + *rLo = tLo; + *rHi = tHi; return; } @@ -3148,6 +5794,19 @@ static HReg iselFltExpr_wrk(ISelEnv * env, IRExpr * e) set_MIPS_rounding_default(env); return dst; } + case Iop_ScaleF64: { + HReg src1 = iselFltExpr(env, e->Iex.Triop.details->arg2); + HReg src2 = iselFltExpr(env, e->Iex.Triop.details->arg3); + HReg v_help = newVRegV(env); + HReg dst = newVRegF(env); + vassert(has_msa); + set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1); + addInstr(env, MIPSInstr_Msa2RF(MSA_FTINT_S, MSA_F_DW, v_help, src2)); + addInstr(env, MIPSInstr_Msa3RF(MSA_FEXP2, MSA_F_DW, dst, src1, v_help)); + set_MIPS_rounding_default_MSA(env); + + return dst; + } default: break; } @@ -3283,6 +5942,34 @@ static HReg iselFltExpr_wrk(ISelEnv * env, IRExpr * e) return dst; } + case Iop_I64UtoF64: { + vassert(mode64); + HReg r_dst = newVRegF(env); + HReg tmp = newVRegV(env); + HReg r_src; + vassert(has_msa); + r_src = iselWordExpr_R(env, e->Iex.Binop.arg2); + set_MIPS_rounding_mode_MSA(env, e->Iex.Binop.arg1); + addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_D, r_src, tmp)); + HReg r_srch = newVRegI(env); + addInstr(env, MIPSInstr_Msa2RF(MSA_FFINT_U, MSA_F_DW, tmp, tmp)); + addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, tmp, r_srch, MSA_DFN_D | 0)); + sub_from_sp(env, 8); + MIPSAMode *am_addr = MIPSAMode_IR(0, StackPointer(mode64)); + + /* store as I64 */ + addInstr(env, MIPSInstr_Store(8, am_addr, r_srch, mode64)); + + /* load as Ity_F64 */ + addInstr(env, MIPSInstr_FpLdSt(True /*load */, 8, r_dst, am_addr)); + + /* Reset SP */ + add_to_sp(env, 8); + set_MIPS_rounding_default_MSA(env); + return r_dst; + } + + default: break; } @@ -3294,31 +5981,45 @@ static HReg iselFltExpr_wrk(ISelEnv * env, IRExpr * e) case Iop_MAddF64: case Iop_MSubF32: case Iop_MSubF64: { - MIPSFpOp op = 0; + Int op = 0; + MSADFFlx type = 0; switch (e->Iex.Qop.details->op) { case Iop_MAddF32: - op = Mfp_MADDS; + op = has_msa ? MSA_FMADD : Mfp_MADDS; + type = MSA_F_WH; break; case Iop_MAddF64: - op = Mfp_MADDD; + op = has_msa ? MSA_FMADD : Mfp_MADDD; + type = MSA_F_DW; break; case Iop_MSubF32: - op = Mfp_MSUBS; + op = has_msa ? MSA_FMSUB : Mfp_MSUBS; + type = MSA_F_WH; break; case Iop_MSubF64: - op = Mfp_MSUBD; + op = has_msa ? MSA_FMSUB : Mfp_MSUBD; + type = MSA_F_DW; break; default: vassert(0); } + HReg dst = newVRegF(env); HReg src1 = iselFltExpr(env, e->Iex.Qop.details->arg2); HReg src2 = iselFltExpr(env, e->Iex.Qop.details->arg3); HReg src3 = iselFltExpr(env, e->Iex.Qop.details->arg4); - set_MIPS_rounding_mode(env, e->Iex.Qop.details->arg1); - addInstr(env, MIPSInstr_FpTernary(op, dst, - src1, src2, src3)); - set_MIPS_rounding_default(env); + + if (has_msa) { + addInstr(env, MIPSInstr_MsaElm(MSA_MOVE, src3, dst, 0)); + set_MIPS_rounding_mode_MSA(env, e->Iex.Qop.details->arg1); + addInstr(env, MIPSInstr_Msa3RF(op, type, dst, src1, src2)); + set_MIPS_rounding_default_MSA(env); + } else { + set_MIPS_rounding_mode(env, e->Iex.Qop.details->arg1); + addInstr(env, MIPSInstr_FpTernary(op, dst, + src1, src2, src3)); + set_MIPS_rounding_default(env); + } return dst; } @@ -3503,6 +6204,60 @@ static HReg iselDblExpr_wrk(ISelEnv * env, IRExpr * e) return dst; } + case Iop_I64StoF64: { + HReg r_dst = newVRegD(env); + MIPSAMode *am_addr; + HReg tmp, fr_src; + if (mode64) { + tmp = newVRegD(env); + fr_src = iselDblExpr(env, e->Iex.Binop.arg2); + /* Move SP down 8 bytes */ + sub_from_sp(env, 8); + am_addr = MIPSAMode_IR(0, StackPointer(mode64)); + + /* store as I64 */ + addInstr(env, MIPSInstr_Store(8, am_addr, fr_src, mode64)); + + /* load as Ity_F64 */ + addInstr(env, MIPSInstr_FpLdSt(True /*load */, 8, tmp, am_addr)); + + /* Reset SP */ + add_to_sp(env, 8); + } else { + HReg Hi, Lo; + tmp = newVRegD(env); + iselInt64Expr(&Hi, &Lo, env, e->Iex.Binop.arg2); + tmp = mk_LoadRR32toFPR(env, Hi, Lo); /* 2*I32 -> F64 */ + } + + set_MIPS_rounding_mode(env, e->Iex.Binop.arg1); + addInstr(env, MIPSInstr_FpConvert(Mfp_CVTDL, r_dst, tmp)); + set_MIPS_rounding_default(env); + + return r_dst; + } + + case Iop_I64UtoF64: { + HReg r_dst; + HReg tmp = newVRegV(env); + HReg r_src2h, r_src2l; + vassert(has_msa); + iselInt64Expr(&r_src2h, &r_src2l, env, e->Iex.Binop.arg2); + set_MIPS_rounding_mode_MSA(env, e->Iex.Binop.arg1); + addInstr(env, MIPSInstr_Msa2R(MSA_FILL, MSA_W, r_src2l, tmp)); + addInstr(env, MIPSInstr_MsaElm(MSA_INSERT, r_src2h, tmp, MSA_DFN_W | 1)); + addInstr(env, MIPSInstr_MsaElm(MSA_INSERT, r_src2l, tmp, MSA_DFN_W | 2)); + addInstr(env, MIPSInstr_MsaElm(MSA_INSERT, r_src2h, tmp, MSA_DFN_W | 3)); + HReg r_srchh = newVRegI(env); + HReg r_srchl = newVRegI(env); + addInstr(env, MIPSInstr_Msa2RF(MSA_FFINT_U, MSA_F_DW, tmp, tmp)); + addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, tmp, r_srchl, MSA_DFN_W | 0)); + addInstr(env, MIPSInstr_MsaElm(MSA_COPY_S, tmp, r_srchh, MSA_DFN_W | 1)); + r_dst = mk_LoadRR32toFPR(env, r_srchh, r_srchl); + set_MIPS_rounding_default_MSA(env); + return r_dst; + } + default: break; @@ -3544,30 +6299,36 @@ static HReg iselDblExpr_wrk(ISelEnv * env, IRExpr * e) set_MIPS_rounding_default(env); return dst; } + + case Iop_ScaleF64: { + HReg src1 = iselDblExpr(env, e->Iex.Triop.details->arg2); + HReg src2 = iselDblExpr(env, e->Iex.Triop.details->arg3); + HReg v_help = newVRegV(env); + HReg dst = newVRegD(env); + vassert(has_msa); + set_MIPS_rounding_mode_MSA(env, e->Iex.Triop.details->arg1); + addInstr(env, MIPSInstr_Msa2RF(MSA_FTINT_S, MSA_F_DW, v_help, src2)); + addInstr(env, MIPSInstr_Msa3RF(MSA_FEXP2, MSA_F_DW, dst, src1, v_help)); + set_MIPS_rounding_default_MSA(env); + return dst; + } default: break; } } if (e->tag == Iex_Qop) { + vassert(has_msa); switch (e->Iex.Qop.details->op) { - case Iop_MAddF32: case Iop_MAddF64: - case Iop_MSubF32: case Iop_MSubF64: { - MIPSFpOp op = 0; + MSA3RFOp op = 0; switch (e->Iex.Qop.details->op) { - case Iop_MAddF32: - op = Mfp_MADDS; - break; case Iop_MAddF64: - op = Mfp_MADDD; - break; - case Iop_MSubF32: - op = Mfp_MSUBS; + op = MSA_FMADD; break; case Iop_MSubF64: - op = Mfp_MSUBD; + op = MSA_FMSUB; break; default: vassert(0); @@ -3576,10 +6337,10 @@ static HReg iselDblExpr_wrk(ISelEnv * env, IRExpr * e) HReg src1 = iselDblExpr(env, e->Iex.Qop.details->arg2); HReg src2 = iselDblExpr(env, e->Iex.Qop.details->arg3); HReg src3 = iselDblExpr(env, e->Iex.Qop.details->arg4); - set_MIPS_rounding_mode(env, e->Iex.Qop.details->arg1); - addInstr(env, MIPSInstr_FpTernary(op, dst, - src1, src2, src3)); - set_MIPS_rounding_default(env); + addInstr(env, MIPSInstr_MsaElm(MSA_MOVE, src3, dst, 0)); + set_MIPS_rounding_mode_MSA(env, e->Iex.Qop.details->arg1); + addInstr(env, MIPSInstr_Msa3RF(op, MSA_F_DW, dst, src1, src2)); + set_MIPS_rounding_default_MSA(env); return dst; } @@ -3628,6 +6389,14 @@ static void iselStmt(ISelEnv * env, IRStmt * stmt) MIPSAMode *am_addr; IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data); + if (tyd == Ity_V128) { + vassert(has_msa); + HReg res = iselV128Expr(env, stmt->Ist.Store.data); + HReg addr = iselWordExpr_R(env, stmt->Ist.Store.addr); + addInstr(env, MIPSInstr_MsaMi10(MSA_ST, 0, addr, res, MSA_B)); + return; + } + /*constructs addressing mode from address provided */ am_addr = iselWordExpr_AMode(env, stmt->Ist.Store.addr, tyd); @@ -3718,6 +6487,21 @@ static void iselStmt(ISelEnv * env, IRStmt * stmt) am_addr)); return; } + if (ty == Ity_V128) { + vassert(has_msa); + HReg v_src = iselV128Expr(env, stmt->Ist.Put.data); +#if defined(_MIPSEB) + HReg r_addr = newVRegI(env); + addInstr(env, MIPSInstr_Alu(mode64 ? Malu_DADD : Malu_ADD, r_addr, GuestStatePointer(mode64), + MIPSRH_Imm(False, stmt->Ist.Put.offset))); + addInstr(env, MIPSInstr_MsaMi10(MSA_ST, 0, r_addr, v_src, MSA_B)); +#else + vassert(!(stmt->Ist.Put.offset & 7)); + addInstr(env, MIPSInstr_MsaMi10(MSA_ST, stmt->Ist.Put.offset >> 3, + GuestStatePointer(mode64), v_src, MSA_D)); +#endif + return; + } break; } @@ -3778,6 +6562,14 @@ static void iselStmt(ISelEnv * env, IRStmt * stmt) return; } } + + if (ty == Ity_V128) { + vassert(has_msa); + HReg v_dst = lookupIRTemp(env, tmp); + HReg v_src = iselV128Expr(env, stmt->Ist.WrTmp.data); + addInstr(env, MIPSInstr_MsaElm(MSA_MOVE, v_src, v_dst, 0)); + return; + } break; } @@ -3854,16 +6646,12 @@ static void iselStmt(ISelEnv * env, IRStmt * stmt) } } case Ity_V128: { - /* ATC. The code that this produces really - needs to be looked at, to verify correctness. - I don't think this can ever happen though, since the - MIPS front end never produces 128-bit loads/stores. */ - vassert(0); + vassert(has_msa); vassert(rloc.pri == RLPri_V128SpRel); + vassert((rloc.spOff < 512) && (rloc.spOff > -512)); vassert(addToSp >= 16); HReg dst = lookupIRTemp(env, d->tmp); - MIPSAMode* am = MIPSAMode_IR(rloc.spOff, StackPointer(mode64)); - addInstr(env, MIPSInstr_Load(mode64 ? 8 : 4, dst, am, mode64)); + addInstr(env, MIPSInstr_MsaMi10(MSA_LD, rloc.spOff, StackPointer(mode64), dst, MSA_B)); add_to_sp(env, addToSp); return; @@ -4166,6 +6954,7 @@ HInstrArray *iselSB_MIPS ( const IRSB* bb, mode64 = arch_host != VexArchMIPS32; fp_mode64 = VEX_MIPS_HOST_FP_MODE(hwcaps_host); + has_msa = VEX_MIPS_PROC_MSA(archinfo_host->hwcaps); /* Make up an initial environment to use. */ env = LibVEX_Alloc_inline(sizeof(ISelEnv)); @@ -4233,6 +7022,9 @@ HInstrArray *iselSB_MIPS ( const IRSB* bb, case Ity_F64: hreg = mkHReg(True, HRcFlt64, 0, j++); break; + case Ity_V128: + hreg = mkHReg(True, HRcVec128, 0, j++); + break; default: ppIRType(bb->tyenv->types[i]); vpanic("iselBB(mips): IRTemp type"); diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c index 107a6a67b1..4ba1ab2523 100644 --- a/VEX/priv/main_main.c +++ b/VEX/priv/main_main.c @@ -1744,6 +1744,10 @@ static const HChar* show_hwcaps_mips32 ( UInt hwcaps ) { /* MIPS baseline. */ if (VEX_MIPS_COMP_ID(hwcaps) == VEX_PRID_COMP_MIPS) { + /* MIPS baseline with msa. */ + if (VEX_MIPS_PROC_MSA(hwcaps)) { + return "MIPS-baseline-msa"; + } /* MIPS baseline with dspr2. */ if (VEX_MIPS_PROC_DSP2(hwcaps)) { return "MIPS-baseline-dspr2"; @@ -1804,7 +1808,11 @@ static const HChar* show_hwcaps_mips64 ( UInt hwcaps ) /* MIPS64 baseline. */ if (VEX_MIPS_COMP_ID(hwcaps) == VEX_PRID_COMP_MIPS) { - return "mips64-baseline"; + /* MIPS baseline with msa. */ + if (VEX_MIPS_PROC_MSA(hwcaps)) { + return "MIPS64-baseline-msa"; + } + return "MIPS64-baseline"; } return "Unsupported baseline"; diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h index 8ae3e3648b..cd616299de 100644 --- a/VEX/pub/libvex.h +++ b/VEX/pub/libvex.h @@ -220,6 +220,7 @@ typedef */ #define VEX_PRID_IMP_34K 0x9500 #define VEX_PRID_IMP_74K 0x9700 +#define VEX_PRID_IMP_P5600 0xa800 /* * Instead of Company Options values, bits 31:24 will be packed with @@ -257,6 +258,11 @@ typedef ((VEX_MIPS_COMP_ID(x) == VEX_PRID_COMP_MIPS) && \ (VEX_MIPS_PROC_ID(x) == VEX_PRID_IMP_34K))) +/* Check if the processor supports MIPS MSA (SIMD)*/ +#define VEX_MIPS_PROC_MSA(x) ((VEX_MIPS_COMP_ID(x) == VEX_PRID_COMP_MIPS) && \ + (VEX_MIPS_PROC_ID(x) == VEX_PRID_IMP_P5600) && \ + (VEX_MIPS_HOST_FP_MODE(x))) + /* These return statically allocated strings. */ extern const HChar* LibVEX_ppVexArch ( VexArch ); diff --git a/VEX/pub/libvex_guest_mips32.h b/VEX/pub/libvex_guest_mips32.h index 0ac8d30e9e..c9291e9ede 100644 --- a/VEX/pub/libvex_guest_mips32.h +++ b/VEX/pub/libvex_guest_mips32.h @@ -154,7 +154,45 @@ typedef /* 492 */ UInt guest_LLaddr; /* 496 */ UInt guest_LLdata; - /* 500 */ UInt _padding2[3]; + /* 500 */ UInt _padding2; + + /* MIPS32 MSA 128-bit vector registers */ + /* 504 */ V128 guest_w0; + /* 520 */ V128 guest_w1; + /* 536 */ V128 guest_w2; + /* 552 */ V128 guest_w3; + /* 568 */ V128 guest_w4; + /* 584 */ V128 guest_w5; + /* 600 */ V128 guest_w6; + /* 616 */ V128 guest_w7; + /* 632 */ V128 guest_w8; + /* 648 */ V128 guest_w9; + /* 664 */ V128 guest_w10; + /* 680 */ V128 guest_w11; + /* 696 */ V128 guest_w12; + /* 712 */ V128 guest_w13; + /* 728 */ V128 guest_w14; + /* 744 */ V128 guest_w15; + /* 760 */ V128 guest_w16; + /* 776 */ V128 guest_w17; + /* 792 */ V128 guest_w18; + /* 808 */ V128 guest_w19; + /* 824 */ V128 guest_w20; + /* 840 */ V128 guest_w21; + /* 856 */ V128 guest_w22; + /* 872 */ V128 guest_w23; + /* 888 */ V128 guest_w24; + /* 904 */ V128 guest_w25; + /* 920 */ V128 guest_w26; + /* 936 */ V128 guest_w27; + /* 952 */ V128 guest_w28; + /* 968 */ V128 guest_w29; + /* 984 */ V128 guest_w30; + /* 1000 */ V128 guest_w31; + + /* 1016 */ UInt guest_MSACSR; + + /* 1020 */ UInt _padding3; } VexGuestMIPS32State; /*---------------------------------------------------------------*/ /*--- Utility functions for MIPS32 guest stuff. ---*/ diff --git a/VEX/pub/libvex_guest_mips64.h b/VEX/pub/libvex_guest_mips64.h index 792803ec6d..6a37f41848 100644 --- a/VEX/pub/libvex_guest_mips64.h +++ b/VEX/pub/libvex_guest_mips64.h @@ -151,7 +151,43 @@ typedef /* 616 */ ULong guest_LLaddr; /* 624 */ ULong guest_LLdata; - /* 632 */ ULong _padding2; + /* MIPS32 MSA 128-bit vector registers */ + /* 632 */ V128 guest_w0; + /* 648 */ V128 guest_w1; + /* 664 */ V128 guest_w2; + /* 680 */ V128 guest_w3; + /* 696 */ V128 guest_w4; + /* 712 */ V128 guest_w5; + /* 728 */ V128 guest_w6; + /* 744 */ V128 guest_w7; + /* 760 */ V128 guest_w8; + /* 776 */ V128 guest_w9; + /* 792 */ V128 guest_w10; + /* 808 */ V128 guest_w11; + /* 824 */ V128 guest_w12; + /* 840 */ V128 guest_w13; + /* 856 */ V128 guest_w14; + /* 872 */ V128 guest_w15; + /* 888 */ V128 guest_w16; + /* 904 */ V128 guest_w17; + /* 920 */ V128 guest_w18; + /* 936 */ V128 guest_w19; + /* 952 */ V128 guest_w20; + /* 968 */ V128 guest_w21; + /* 984 */ V128 guest_w22; + /* 1000 */ V128 guest_w23; + /* 1016 */ V128 guest_w24; + /* 1032 */ V128 guest_w25; + /* 1048 */ V128 guest_w26; + /* 1064 */ V128 guest_w27; + /* 1080 */ V128 guest_w28; + /* 1096 */ V128 guest_w29; + /* 1112 */ V128 guest_w30; + /* 1128 */ V128 guest_w31; + /* 1144 */ UInt guest_MSACSR; + + /* 1148 */ UInt _padding2; + } VexGuestMIPS64State; /*---------------------------------------------------------------*/