From: Julian Seward Date: Wed, 19 Oct 2011 15:24:01 +0000 (+0000) Subject: Implement the SSE4.1 insn PCMPEQQ. n-i-bz. (VEX side changes) X-Git-Tag: svn/VALGRIND_3_7_0^2~10 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b395d73a89be2e1a130ff70034a6ff2e75614171;p=thirdparty%2Fvalgrind.git Implement the SSE4.1 insn PCMPEQQ. n-i-bz. (VEX side changes) ** MERGE TO AVX ** git-svn-id: svn://svn.valgrind.org/vex/trunk@2218 --- diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c index c6ad6a1ed4..9fbe664724 100644 --- a/VEX/priv/guest_amd64_toIR.c +++ b/VEX/priv/guest_amd64_toIR.c @@ -16135,6 +16135,17 @@ DisResult disInstr_AMD64_WRK ( goto decode_success; } + /* 66 0F 38 29 = PCMPEQQ + 64x2 equality comparison + */ + if ( have66noF2noF3( pfx ) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x29) { + /* FIXME: this needs an alignment check */ + delta = dis_SSEint_E_to_G( vbi, pfx, delta+3, + "pcmpeqq", Iop_CmpEQ64x2, False ); + goto decode_success; + } + /* ---------------------------------------------------- */ /* --- end of the SSE4 decoder --- */ /* ---------------------------------------------------- */ diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c index c887ca3a90..86c089d1a3 100644 --- a/VEX/priv/host_amd64_isel.c +++ b/VEX/priv/host_amd64_isel.c @@ -3658,6 +3658,8 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) goto do_SseAssistedBinary; case Iop_Min8Sx16: fn = (HWord)h_generic_calc_Min8Sx16; goto do_SseAssistedBinary; + case Iop_CmpEQ64x2: fn = (HWord)h_generic_calc_CmpEQ64x2; + goto do_SseAssistedBinary; case Iop_CmpGT64Sx2: fn = (HWord)h_generic_calc_CmpGT64Sx2; goto do_SseAssistedBinary; case Iop_QNarrowBin32Sto16Ux8: diff --git a/VEX/priv/host_generic_simd128.c b/VEX/priv/host_generic_simd128.c index 4fd1df58ab..e29a2eb554 100644 --- a/VEX/priv/host_generic_simd128.c +++ b/VEX/priv/host_generic_simd128.c @@ -88,6 +88,12 @@ static inline UChar min8S ( Char xx, Char yy ) return toUChar((xx < yy) ? xx : yy); } +static inline ULong cmpEQ64 ( Long xx, Long yy ) +{ + return (((Long)xx) == ((Long)yy)) + ? 0xFFFFFFFFFFFFFFFFULL : 0ULL; +} + static inline ULong cmpGT64S ( Long xx, Long yy ) { return (((Long)xx) > ((Long)yy)) @@ -225,6 +231,13 @@ void h_generic_calc_Min8Sx16 ( /*OUT*/V128* res, res->w8[15] = min8S(argL->w8[15], argR->w8[15]); } +void h_generic_calc_CmpEQ64x2 ( /*OUT*/V128* res, + V128* argL, V128* argR ) +{ + res->w64[0] = cmpEQ64(argL->w64[0], argR->w64[0]); + res->w64[1] = cmpEQ64(argL->w64[1], argR->w64[1]); +} + void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res, V128* argL, V128* argR ) { diff --git a/VEX/priv/host_generic_simd128.h b/VEX/priv/host_generic_simd128.h index ed8a7db68a..797617823c 100644 --- a/VEX/priv/host_generic_simd128.h +++ b/VEX/priv/host_generic_simd128.h @@ -57,6 +57,7 @@ extern void h_generic_calc_Max16Ux8 ( /*OUT*/V128*, V128*, V128* ); extern void h_generic_calc_Min16Ux8 ( /*OUT*/V128*, V128*, V128* ); extern void h_generic_calc_Max8Sx16 ( /*OUT*/V128*, V128*, V128* ); extern void h_generic_calc_Min8Sx16 ( /*OUT*/V128*, V128*, V128* ); +extern void h_generic_calc_CmpEQ64x2 ( /*OUT*/V128*, V128*, V128* ); extern void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128*, V128*, V128* ); extern void h_generic_calc_SarN64x2 ( /*OUT*/V128*, V128*, UInt ); extern void h_generic_calc_SarN8x16 ( /*OUT*/V128*, V128*, UInt ); diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c index c604ad20c4..13667d7c79 100644 --- a/VEX/priv/ir_defs.c +++ b/VEX/priv/ir_defs.c @@ -777,6 +777,7 @@ void ppIROp ( IROp op ) case Iop_CmpEQ8x16: vex_printf("CmpEQ8x16"); return; case Iop_CmpEQ16x8: vex_printf("CmpEQ16x8"); return; case Iop_CmpEQ32x4: vex_printf("CmpEQ32x4"); return; + case Iop_CmpEQ64x2: vex_printf("CmpEQ64x2"); return; case Iop_CmpGT8Sx16: vex_printf("CmpGT8Sx16"); return; case Iop_CmpGT16Sx8: vex_printf("CmpGT16Sx8"); return; case Iop_CmpGT32Sx4: vex_printf("CmpGT32Sx4"); return; @@ -2434,6 +2435,7 @@ void typeOfPrimop ( IROp op, case Iop_Min8Sx16: case Iop_Min16Sx8: case Iop_Min32Sx4: case Iop_Min8Ux16: case Iop_Min16Ux8: case Iop_Min32Ux4: case Iop_CmpEQ8x16: case Iop_CmpEQ16x8: case Iop_CmpEQ32x4: + case Iop_CmpEQ64x2: case Iop_CmpGT8Sx16: case Iop_CmpGT16Sx8: case Iop_CmpGT32Sx4: case Iop_CmpGT64Sx2: case Iop_CmpGT8Ux16: case Iop_CmpGT16Ux8: case Iop_CmpGT32Ux4: diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index 435090fc20..f8324697f0 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -1170,7 +1170,7 @@ typedef Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, /* COMPARISON */ - Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, + Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2, Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2, Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4,