From: Julian Seward Date: Wed, 15 Jun 2011 15:09:37 +0000 (+0000) Subject: Partially fix underspecification of saturating narrowing primops that X-Git-Tag: svn/VALGRIND_3_7_0^2~65 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=aaa793ad9e8650e995231bdf9d259ec97deb531b;p=thirdparty%2Fvalgrind.git Partially fix underspecification of saturating narrowing primops that became apparent whilst looking into the problem of implementing the SSE4 packusdw instruction. Probably breaks Altivec. git-svn-id: svn://svn.valgrind.org/vex/trunk@2159 --- diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c index 39032b12d8..c76f5512e2 100644 --- a/VEX/priv/guest_amd64_toIR.c +++ b/VEX/priv/guest_amd64_toIR.c @@ -6504,9 +6504,9 @@ ULong dis_MMXop_regmem_to_reg ( VexAbiInfo* vbi, case 0x65: op = Iop_CmpGT16Sx4; break; case 0x66: op = Iop_CmpGT32Sx2; break; - case 0x6B: op = Iop_QNarrow32Sx2; eLeft = True; break; - case 0x63: op = Iop_QNarrow16Sx4; eLeft = True; break; - case 0x67: op = Iop_QNarrow16Ux4; eLeft = True; break; + case 0x6B: op = Iop_QNarrow32Sto16Sx4; eLeft = True; break; + case 0x63: op = Iop_QNarrow16Sto8Sx8; eLeft = True; break; + case 0x67: op = Iop_QNarrow16Sto8Ux8; eLeft = True; break; case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break; case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break; @@ -11786,7 +11786,8 @@ DisResult disInstr_AMD64_WRK ( if (have66noF2noF3(pfx) && sz == 2 && insn[0] == 0x0F && insn[1] == 0x6B) { delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, - "packssdw", Iop_QNarrow32Sx4, True ); + "packssdw", + Iop_QNarrow32Sto16Sx8, True ); goto decode_success; } @@ -11794,7 +11795,8 @@ DisResult disInstr_AMD64_WRK ( if (have66noF2noF3(pfx) && sz == 2 && insn[0] == 0x0F && insn[1] == 0x63) { delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, - "packsswb", Iop_QNarrow16Sx8, True ); + "packsswb", + Iop_QNarrow16Sto8Sx16, True ); goto decode_success; } @@ -11802,7 +11804,8 @@ DisResult disInstr_AMD64_WRK ( if (have66noF2noF3(pfx) && sz == 2 && insn[0] == 0x0F && insn[1] == 0x67) { delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, - "packuswb", Iop_QNarrow16Ux8, True ); + "packuswb", + Iop_QNarrow16Sto8Ux16, True ); goto decode_success; } diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c index 738fd13963..6cf7c9d4e2 100644 --- a/VEX/priv/guest_ppc_toIR.c +++ b/VEX/priv/guest_ppc_toIR.c @@ -9418,7 +9418,7 @@ static Bool dis_av_multarith ( UInt theInstr ) mkU8(15))) ); putVReg( vD_addr, - binop(Iop_QNarrow32Sx4, mkexpr(zHi), mkexpr(zLo)) ); + binop(Iop_QNarrow32Sto16Sx8, mkexpr(zHi), mkexpr(zLo)) ); break; } case 0x21: { // vmhraddshs (Mult High Round, Add Signed HW Saturate, AV p186) @@ -9452,7 +9452,8 @@ static Bool dis_av_multarith ( UInt theInstr ) mkexpr(aHi), mkexpr(bHi))), mkU8(15))) ); - putVReg( vD_addr, binop(Iop_QNarrow32Sx4, mkexpr(zHi), mkexpr(zLo)) ); + putVReg( vD_addr, + binop(Iop_QNarrow32Sto16Sx8, mkexpr(zHi), mkexpr(zLo)) ); break; } case 0x22: { // vmladduhm (Mult Low, Add Unsigned HW Modulo, AV p194) @@ -9965,14 +9966,14 @@ static Bool dis_av_pack ( UInt theInstr ) case 0x08E: // vpkuhus (Pack Unsigned HW Unsigned Saturate, AV p225) DIP("vpkuhus v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); putVReg( vD_addr, - binop(Iop_QNarrow16Ux8, mkexpr(vA), mkexpr(vB)) ); + binop(Iop_QNarrow16Uto8Ux16, mkexpr(vA), mkexpr(vB)) ); // TODO: set VSCR[SAT] return True; case 0x0CE: // vpkuwus (Pack Unsigned W Unsigned Saturate, AV p227) DIP("vpkuwus v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); putVReg( vD_addr, - binop(Iop_QNarrow32Ux4, mkexpr(vA), mkexpr(vB)) ); + binop(Iop_QNarrow32Uto16Ux8, mkexpr(vA), mkexpr(vB)) ); // TODO: set VSCR[SAT] return True; @@ -9991,7 +9992,7 @@ static Bool dis_av_pack ( UInt theInstr ) unop(Iop_NotV128, binop(Iop_SarN16x8, mkexpr(vB), mkU8(15)))) ); - putVReg( vD_addr, binop(Iop_QNarrow16Ux8, + putVReg( vD_addr, binop(Iop_QNarrow16Uto8Ux16, mkexpr(vA_tmp), mkexpr(vB_tmp)) ); // TODO: set VSCR[SAT] return True; @@ -10011,7 +10012,7 @@ static Bool dis_av_pack ( UInt theInstr ) unop(Iop_NotV128, binop(Iop_SarN32x4, mkexpr(vB), mkU8(31)))) ); - putVReg( vD_addr, binop(Iop_QNarrow32Ux4, + putVReg( vD_addr, binop(Iop_QNarrow32Uto16Ux8, mkexpr(vA_tmp), mkexpr(vB_tmp)) ); // TODO: set VSCR[SAT] return True; @@ -10019,14 +10020,14 @@ static Bool dis_av_pack ( UInt theInstr ) case 0x18E: // vpkshss (Pack Signed HW Signed Saturate, AV p220) DIP("vpkshss v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); putVReg( vD_addr, - binop(Iop_QNarrow16Sx8, mkexpr(vA), mkexpr(vB)) ); + binop(Iop_QNarrow16Sto8Sx16, mkexpr(vA), mkexpr(vB)) ); // TODO: set VSCR[SAT] return True; case 0x1CE: // vpkswss (Pack Signed W Signed Saturate, AV p222) DIP("vpkswss v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); putVReg( vD_addr, - binop(Iop_QNarrow32Sx4, mkexpr(vA), mkexpr(vB)) ); + binop(Iop_QNarrow32Sto16Sx8, mkexpr(vA), mkexpr(vB)) ); // TODO: set VSCR[SAT] return True; diff --git a/VEX/priv/guest_x86_toIR.c b/VEX/priv/guest_x86_toIR.c index dc6c81cce1..c8398a859a 100644 --- a/VEX/priv/guest_x86_toIR.c +++ b/VEX/priv/guest_x86_toIR.c @@ -5475,9 +5475,9 @@ UInt dis_MMXop_regmem_to_reg ( UChar sorb, case 0x65: op = Iop_CmpGT16Sx4; break; case 0x66: op = Iop_CmpGT32Sx2; break; - case 0x6B: op = Iop_QNarrow32Sx2; eLeft = True; break; - case 0x63: op = Iop_QNarrow16Sx4; eLeft = True; break; - case 0x67: op = Iop_QNarrow16Ux4; eLeft = True; break; + case 0x6B: op = Iop_QNarrow32Sto16Sx4; eLeft = True; break; + case 0x63: op = Iop_QNarrow16Sto8Sx8; eLeft = True; break; + case 0x67: op = Iop_QNarrow16Sto8Ux8; eLeft = True; break; case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break; case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break; @@ -10532,21 +10532,24 @@ DisResult disInstr_X86_WRK ( /* 66 0F 6B = PACKSSDW */ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6B) { delta = dis_SSEint_E_to_G( sorb, delta+2, - "packssdw", Iop_QNarrow32Sx4, True ); + "packssdw", + Iop_QNarrow32Sto16Sx8, True ); goto decode_success; } /* 66 0F 63 = PACKSSWB */ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x63) { delta = dis_SSEint_E_to_G( sorb, delta+2, - "packsswb", Iop_QNarrow16Sx8, True ); + "packsswb", + Iop_QNarrow16Sto8Sx16, True ); goto decode_success; } /* 66 0F 67 = PACKUSWB */ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x67) { delta = dis_SSEint_E_to_G( sorb, delta+2, - "packuswb", Iop_QNarrow16Ux8, True ); + "packuswb", + Iop_QNarrow16Sto8Ux16, True ); goto decode_success; } diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c index 7e6cfe3eaa..5c84217cb4 100644 --- a/VEX/priv/host_amd64_isel.c +++ b/VEX/priv/host_amd64_isel.c @@ -1094,12 +1094,12 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) case Iop_QAdd16Ux4: fn = (HWord)h_generic_calc_QAdd16Ux4; break; - case Iop_QNarrow32Sx2: - fn = (HWord)h_generic_calc_QNarrow32Sx2; break; - case Iop_QNarrow16Sx4: - fn = (HWord)h_generic_calc_QNarrow16Sx4; break; - case Iop_QNarrow16Ux4: - fn = (HWord)h_generic_calc_QNarrow16Ux4; break; + case Iop_QNarrow32Sto16Sx4: + fn = (HWord)h_generic_calc_QNarrow32Sto16Sx4; break; + case Iop_QNarrow16Sto8Sx8: + fn = (HWord)h_generic_calc_QNarrow16Sto8Sx8; break; + case Iop_QNarrow16Sto8Ux8: + fn = (HWord)h_generic_calc_QNarrow16Sto8Ux8; break; case Iop_QSub8Sx8: fn = (HWord)h_generic_calc_QSub8Sx8; break; @@ -3544,11 +3544,11 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) return dst; } - case Iop_QNarrow32Sx4: + case Iop_QNarrow32Sto16Sx8: op = Asse_PACKSSD; arg1isEReg = True; goto do_SseReRg; - case Iop_QNarrow16Sx8: + case Iop_QNarrow16Sto8Sx16: op = Asse_PACKSSW; arg1isEReg = True; goto do_SseReRg; - case Iop_QNarrow16Ux8: + case Iop_QNarrow16Sto8Ux16: op = Asse_PACKUSW; arg1isEReg = True; goto do_SseReRg; case Iop_InterleaveHI8x16: diff --git a/VEX/priv/host_generic_simd64.c b/VEX/priv/host_generic_simd64.c index 03d6d2ff17..58ebc7bf48 100644 --- a/VEX/priv/host_generic_simd64.c +++ b/VEX/priv/host_generic_simd64.c @@ -272,7 +272,7 @@ static inline UChar cmpnez8 ( UChar xx ) return toUChar(xx==0 ? 0 : 0xFF); } -static inline Short qnarrow32Sto16 ( UInt xx0 ) +static inline Short qnarrow32Sto16S ( UInt xx0 ) { Int xx = (Int)xx0; if (xx < -32768) xx = -32768; @@ -280,7 +280,7 @@ static inline Short qnarrow32Sto16 ( UInt xx0 ) return (Short)xx; } -static inline Char qnarrow16Sto8 ( UShort xx0 ) +static inline Char qnarrow16Sto8S ( UShort xx0 ) { Short xx = (Short)xx0; if (xx < -128) xx = -128; @@ -288,7 +288,7 @@ static inline Char qnarrow16Sto8 ( UShort xx0 ) return (Char)xx; } -static inline UChar qnarrow16Uto8 ( UShort xx0 ) +static inline UChar qnarrow16Sto8U ( UShort xx0 ) { Short xx = (Short)xx0; if (xx < 0) xx = 0; @@ -759,21 +759,21 @@ ULong h_generic_calc_CmpNEZ8x8 ( ULong xx ) /* ------------ Saturating narrowing ------------ */ -ULong h_generic_calc_QNarrow32Sx2 ( ULong aa, ULong bb ) +ULong h_generic_calc_QNarrow32Sto16Sx4 ( ULong aa, ULong bb ) { UInt d = sel32x2_1(aa); UInt c = sel32x2_0(aa); UInt b = sel32x2_1(bb); UInt a = sel32x2_0(bb); return mk16x4( - qnarrow32Sto16(d), - qnarrow32Sto16(c), - qnarrow32Sto16(b), - qnarrow32Sto16(a) + qnarrow32Sto16S(d), + qnarrow32Sto16S(c), + qnarrow32Sto16S(b), + qnarrow32Sto16S(a) ); } -ULong h_generic_calc_QNarrow16Sx4 ( ULong aa, ULong bb ) +ULong h_generic_calc_QNarrow16Sto8Sx8 ( ULong aa, ULong bb ) { UShort h = sel16x4_3(aa); UShort g = sel16x4_2(aa); @@ -784,18 +784,18 @@ ULong h_generic_calc_QNarrow16Sx4 ( ULong aa, ULong bb ) UShort b = sel16x4_1(bb); UShort a = sel16x4_0(bb); return mk8x8( - qnarrow16Sto8(h), - qnarrow16Sto8(g), - qnarrow16Sto8(f), - qnarrow16Sto8(e), - qnarrow16Sto8(d), - qnarrow16Sto8(c), - qnarrow16Sto8(b), - qnarrow16Sto8(a) + qnarrow16Sto8S(h), + qnarrow16Sto8S(g), + qnarrow16Sto8S(f), + qnarrow16Sto8S(e), + qnarrow16Sto8S(d), + qnarrow16Sto8S(c), + qnarrow16Sto8S(b), + qnarrow16Sto8S(a) ); } -ULong h_generic_calc_QNarrow16Ux4 ( ULong aa, ULong bb ) +ULong h_generic_calc_QNarrow16Sto8Ux8 ( ULong aa, ULong bb ) { UShort h = sel16x4_3(aa); UShort g = sel16x4_2(aa); @@ -806,14 +806,14 @@ ULong h_generic_calc_QNarrow16Ux4 ( ULong aa, ULong bb ) UShort b = sel16x4_1(bb); UShort a = sel16x4_0(bb); return mk8x8( - qnarrow16Uto8(h), - qnarrow16Uto8(g), - qnarrow16Uto8(f), - qnarrow16Uto8(e), - qnarrow16Uto8(d), - qnarrow16Uto8(c), - qnarrow16Uto8(b), - qnarrow16Uto8(a) + qnarrow16Sto8U(h), + qnarrow16Sto8U(g), + qnarrow16Sto8U(f), + qnarrow16Sto8U(e), + qnarrow16Sto8U(d), + qnarrow16Sto8U(c), + qnarrow16Sto8U(b), + qnarrow16Sto8U(a) ); } diff --git a/VEX/priv/host_generic_simd64.h b/VEX/priv/host_generic_simd64.h index e854fc726e..05ac56047c 100644 --- a/VEX/priv/host_generic_simd64.h +++ b/VEX/priv/host_generic_simd64.h @@ -87,9 +87,9 @@ extern ULong h_generic_calc_CmpNEZ32x2 ( ULong ); extern ULong h_generic_calc_CmpNEZ16x4 ( ULong ); extern ULong h_generic_calc_CmpNEZ8x8 ( ULong ); -extern ULong h_generic_calc_QNarrow32Sx2 ( ULong, ULong ); -extern ULong h_generic_calc_QNarrow16Sx4 ( ULong, ULong ); -extern ULong h_generic_calc_QNarrow16Ux4 ( ULong, ULong ); +extern ULong h_generic_calc_QNarrow32Sto16Sx4 ( ULong, ULong ); +extern ULong h_generic_calc_QNarrow16Sto8Sx8 ( ULong, ULong ); +extern ULong h_generic_calc_QNarrow16Sto8Ux8 ( ULong, ULong ); extern ULong h_generic_calc_InterleaveHI8x8 ( ULong, ULong ); extern ULong h_generic_calc_InterleaveLO8x8 ( ULong, ULong ); diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c index 4ad9925390..f3885ebc5e 100644 --- a/VEX/priv/host_ppc_isel.c +++ b/VEX/priv/host_ppc_isel.c @@ -3678,11 +3678,11 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_Shr16x8: op = Pav_SHR; goto do_AvBin16x8; case Iop_Sar16x8: op = Pav_SAR; goto do_AvBin16x8; case Iop_Rol16x8: op = Pav_ROTL; goto do_AvBin16x8; - case Iop_Narrow16x8: op = Pav_PACKUU; goto do_AvBin16x8; - case Iop_QNarrow16Ux8: op = Pav_QPACKUU; goto do_AvBin16x8; - case Iop_QNarrow16Sx8: op = Pav_QPACKSS; goto do_AvBin16x8; - case Iop_InterleaveHI16x8: op = Pav_MRGHI; goto do_AvBin16x8; - case Iop_InterleaveLO16x8: op = Pav_MRGLO; goto do_AvBin16x8; + case Iop_Narrow16x8: op = Pav_PACKUU; goto do_AvBin16x8; + case Iop_QNarrow16Uto8Ux16: op = Pav_QPACKUU; goto do_AvBin16x8; + case Iop_QNarrow16Sto8Sx16: op = Pav_QPACKSS; goto do_AvBin16x8; + case Iop_InterleaveHI16x8: op = Pav_MRGHI; goto do_AvBin16x8; + case Iop_InterleaveLO16x8: op = Pav_MRGLO; goto do_AvBin16x8; case Iop_Add16x8: op = Pav_ADDU; goto do_AvBin16x8; case Iop_QAdd16Ux8: op = Pav_QADDU; goto do_AvBin16x8; case Iop_QAdd16Sx8: op = Pav_QADDS; goto do_AvBin16x8; @@ -3712,11 +3712,11 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_Shr32x4: op = Pav_SHR; goto do_AvBin32x4; case Iop_Sar32x4: op = Pav_SAR; goto do_AvBin32x4; case Iop_Rol32x4: op = Pav_ROTL; goto do_AvBin32x4; - case Iop_Narrow32x4: op = Pav_PACKUU; goto do_AvBin32x4; - case Iop_QNarrow32Ux4: op = Pav_QPACKUU; goto do_AvBin32x4; - case Iop_QNarrow32Sx4: op = Pav_QPACKSS; goto do_AvBin32x4; - case Iop_InterleaveHI32x4: op = Pav_MRGHI; goto do_AvBin32x4; - case Iop_InterleaveLO32x4: op = Pav_MRGLO; goto do_AvBin32x4; + case Iop_Narrow32x4: op = Pav_PACKUU; goto do_AvBin32x4; + case Iop_QNarrow32Uto16Ux8: op = Pav_QPACKUU; goto do_AvBin32x4; + case Iop_QNarrow32Sto16Sx8: op = Pav_QPACKSS; goto do_AvBin32x4; + case Iop_InterleaveHI32x4: op = Pav_MRGHI; goto do_AvBin32x4; + case Iop_InterleaveLO32x4: op = Pav_MRGLO; goto do_AvBin32x4; case Iop_Add32x4: op = Pav_ADDU; goto do_AvBin32x4; case Iop_QAdd32Ux4: op = Pav_QADDU; goto do_AvBin32x4; case Iop_QAdd32Sx4: op = Pav_QADDS; goto do_AvBin32x4; diff --git a/VEX/priv/host_x86_isel.c b/VEX/priv/host_x86_isel.c index db2b8bcd09..74552da6a6 100644 --- a/VEX/priv/host_x86_isel.c +++ b/VEX/priv/host_x86_isel.c @@ -2386,12 +2386,12 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) case Iop_QAdd16Ux4: fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish; - case Iop_QNarrow32Sx2: - fn = (HWord)h_generic_calc_QNarrow32Sx2; goto binnish; - case Iop_QNarrow16Sx4: - fn = (HWord)h_generic_calc_QNarrow16Sx4; goto binnish; - case Iop_QNarrow16Ux4: - fn = (HWord)h_generic_calc_QNarrow16Ux4; goto binnish; + case Iop_QNarrow32Sto16Sx4: + fn = (HWord)h_generic_calc_QNarrow32Sto16Sx4; goto binnish; + case Iop_QNarrow16Sto8Sx8: + fn = (HWord)h_generic_calc_QNarrow16Sto8Sx8; goto binnish; + case Iop_QNarrow16Sto8Ux8: + fn = (HWord)h_generic_calc_QNarrow16Sto8Ux8; goto binnish; case Iop_QSub8Sx8: fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish; @@ -3500,11 +3500,11 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) return dst; } - case Iop_QNarrow32Sx4: + case Iop_QNarrow32Sto16Sx8: op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg; - case Iop_QNarrow16Sx8: + case Iop_QNarrow16Sto8Sx16: op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg; - case Iop_QNarrow16Ux8: + case Iop_QNarrow16Sto8Ux16: op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg; case Iop_InterleaveHI8x16: diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c index 566187ad34..8799b44e3e 100644 --- a/VEX/priv/ir_defs.c +++ b/VEX/priv/ir_defs.c @@ -506,9 +506,9 @@ void ppIROp ( IROp op ) case Iop_SarN8x8: vex_printf("SarN8x8"); return; case Iop_SarN16x4: vex_printf("SarN16x4"); return; case Iop_SarN32x2: vex_printf("SarN32x2"); return; - case Iop_QNarrow16Ux4: vex_printf("QNarrow16Ux4"); return; - case Iop_QNarrow16Sx4: vex_printf("QNarrow16Sx4"); return; - case Iop_QNarrow32Sx2: vex_printf("QNarrow32Sx2"); return; + case Iop_QNarrow16Sto8Ux8: vex_printf("QNarrow16Sto8Ux8"); return; + case Iop_QNarrow16Sto8Sx8: vex_printf("QNarrow16Sto8Sx8"); return; + case Iop_QNarrow32Sto16Sx4: vex_printf("QNarrow32Sto16Sx4"); return; case Iop_InterleaveHI8x8: vex_printf("InterleaveHI8x8"); return; case Iop_InterleaveHI16x4: vex_printf("InterleaveHI16x4"); return; case Iop_InterleaveHI32x2: vex_printf("InterleaveHI32x2"); return; @@ -846,10 +846,10 @@ void ppIROp ( IROp op ) case Iop_Narrow16x8: vex_printf("Narrow16x8"); return; case Iop_Narrow32x4: vex_printf("Narrow32x4"); return; - case Iop_QNarrow16Ux8: vex_printf("QNarrow16Ux8"); return; - case Iop_QNarrow32Ux4: vex_printf("QNarrow32Ux4"); return; - case Iop_QNarrow16Sx8: vex_printf("QNarrow16Sx8"); return; - case Iop_QNarrow32Sx4: vex_printf("QNarrow32Sx4"); return; + case Iop_QNarrow16Sto8Ux16: vex_printf("QNarrow16Sto8Ux16"); return; + case Iop_QNarrow32Uto16Ux8: vex_printf("QNarrow32Uto16Ux8"); return; + case Iop_QNarrow16Sto8Sx16: vex_printf("QNarrow16Sto8Sx16"); return; + case Iop_QNarrow32Sto16Sx8: vex_printf("QNarrow32Sto16Sx8"); return; case Iop_Shorten16x8: vex_printf("Shorten16x8"); return; case Iop_Shorten32x4: vex_printf("Shorten32x4"); return; case Iop_Shorten64x2: vex_printf("Shorten64x2"); return; @@ -2052,8 +2052,8 @@ void typeOfPrimop ( IROp op, case Iop_QAdd32Ux2: case Iop_QAdd64Ux1: case Iop_PwAdd8x8: case Iop_PwAdd16x4: case Iop_PwAdd32x2: case Iop_PwAdd32Fx2: - case Iop_QNarrow32Sx2: - case Iop_QNarrow16Sx4: case Iop_QNarrow16Ux4: + case Iop_QNarrow32Sto16Sx4: + case Iop_QNarrow16Sto8Sx8: case Iop_QNarrow16Sto8Ux8: case Iop_Sub8x8: case Iop_Sub16x4: case Iop_Sub32x2: case Iop_QSub8Sx8: case Iop_QSub16Sx4: case Iop_QSub32Sx2: case Iop_QSub64Sx1: @@ -2418,8 +2418,9 @@ void typeOfPrimop ( IROp op, case Iop_Sar8x16: case Iop_Sar16x8: case Iop_Sar32x4: case Iop_Sar64x2: case Iop_Sal8x16: case Iop_Sal16x8: case Iop_Sal32x4: case Iop_Sal64x2: case Iop_Rol8x16: case Iop_Rol16x8: case Iop_Rol32x4: - case Iop_QNarrow16Ux8: case Iop_QNarrow32Ux4: - case Iop_QNarrow16Sx8: case Iop_QNarrow32Sx4: + case Iop_QNarrow16Sto8Ux16: + case Iop_QNarrow16Sto8Sx16: case Iop_QNarrow32Sto16Sx8: + case Iop_QNarrow32Uto16Ux8: case Iop_Narrow16x8: case Iop_Narrow32x4: case Iop_InterleaveHI8x16: case Iop_InterleaveHI16x8: case Iop_InterleaveHI32x4: case Iop_InterleaveHI64x2: diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index 1479cd7382..49497b168e 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -896,9 +896,27 @@ typedef Iop_QSalN8x8, Iop_QSalN16x4, Iop_QSalN32x2, Iop_QSalN64x1, /* NARROWING -- narrow 2xI64 into 1xI64, hi half from left arg */ - Iop_QNarrow16Ux4, - Iop_QNarrow16Sx4, - Iop_QNarrow32Sx2, + /* For saturated narrowing, I believe there are 4 variants of + the basic arithmetic operation, depending on the signedness + of argument and result. Here are examples that exemplify + what I mean: + + QNarrow16Uto8U ( UShort x ) if (x >u 255) x = 255; + return x[7:0]; + + QNarrow16Sto8S ( Short x ) if (x s 127) x = 127; + return x[7:0]; + + QNarrow16Uto8S ( UShort x ) if (x >u 127) x = 127; + return x[7:0]; + + QNarrow16Sto8U ( Short x ) if (x s 255) x = 255; + return x[7:0]; + */ + Iop_QNarrow16Sto8Ux8, + Iop_QNarrow16Sto8Sx8, Iop_QNarrow32Sto16Sx4, /* INTERLEAVING */ /* Interleave lanes from low or high halves of @@ -1176,9 +1194,10 @@ typedef Iop_QSalN8x16, Iop_QSalN16x8, Iop_QSalN32x4, Iop_QSalN64x2, /* NARROWING -- narrow 2xV128 into 1xV128, hi half from left arg */ - /* Note: the 16{U,S} and 32{U,S} are the pre-narrow lane widths. */ - Iop_QNarrow16Ux8, Iop_QNarrow32Ux4, - Iop_QNarrow16Sx8, Iop_QNarrow32Sx4, + /* See comments above w.r.t. U vs S issues in saturated narrowing. */ + Iop_QNarrow16Sto8Ux16, + Iop_QNarrow16Sto8Sx16, Iop_QNarrow32Sto16Sx8, + Iop_QNarrow16Uto8Ux16, Iop_QNarrow32Uto16Ux8, Iop_Narrow16x8, Iop_Narrow32x4, /* Shortening V128->I64, lo half from each element */ Iop_Shorten16x8, Iop_Shorten32x4, Iop_Shorten64x2, diff --git a/VEX/test_main.c b/VEX/test_main.c index f1336aa632..231c75f24d 100644 --- a/VEX/test_main.c +++ b/VEX/test_main.c @@ -1531,9 +1531,9 @@ IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op, IRAtom *at1, *at2, *at3; IRAtom* (*pcast)( MCEnv*, IRAtom* ); switch (narrow_op) { - case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break; - case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break; - case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break; + case Iop_QNarrow32Sto16Sx8: pcast = mkPCast32x4; break; + case Iop_QNarrow16Sto8Sx16: pcast = mkPCast16x8; break; + case Iop_QNarrow16Sto8Ux16: pcast = mkPCast16x8; break; default: VG_(tool_panic)("vectorNarrowV128"); } tl_assert(isShadowAtom(mce,vatom1)); @@ -1671,9 +1671,9 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce, case Iop_QAdd64Sx2: return binary64Ix2(mce, vatom1, vatom2); - case Iop_QNarrow32Sx4: - case Iop_QNarrow16Sx8: - case Iop_QNarrow16Ux8: + case Iop_QNarrow32Sto16Sx8: + case Iop_QNarrow16Sto8Sx16: + case Iop_QNarrow16Sto8Ux16: return vectorNarrowV128(mce, op, vatom1, vatom2); case Iop_Sub64Fx2: