From: Alexandra Hájková Date: Wed, 27 May 2026 15:24:34 +0000 (-0400) Subject: Add SSE4.1 PHMINPOSUW for x86 32-bit X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;ds=inline;p=thirdparty%2Fvalgrind.git Add SSE4.1 PHMINPOSUW for x86 32-bit Moved sel16x4_0/1/2/3 to VEX/priv/guest_generic_helpers.h. Moved amd64g_calculate_sse_phminposuw from guest_amd64_helpers.c to guest_generic_helpers.h and renamed to g_calculate_sse_phminposuw so both x86 and amd64 can use it. Add test function to sse4-common.h and update none/tests/x86/sse4-x86.c to test the instruction. BZ: https://bugs.kde.org/show_bug.cgi?id=518217 --- diff --git a/NEWS b/NEWS index 5bf36331f..5d79c0ad5 100644 --- a/NEWS +++ b/NEWS @@ -37,6 +37,7 @@ are not entered into bugzilla tend to get forgotten about or ignored. 519574 valgrind 3.27 "--fair-sched=yes" does not work 519613 Valgrind incorrectly unpacks the result of sys_port (port_getn) on error, leading to a ~60s wallclock time delay on every call +518217 Add PHMINPOSUW SSE4.1 support for x86 520482 Advertise POPCNT on x86 via CPUID 520753 Advertise LZCNT via CPUID for x86 (32-bit) clients 520856 unhandled instruction bytes: 0x2E 0xFF 0x14 0x85 diff --git a/VEX/priv/guest_amd64_defs.h b/VEX/priv/guest_amd64_defs.h index 0937a8607..c135eaa53 100644 --- a/VEX/priv/guest_amd64_defs.h +++ b/VEX/priv/guest_amd64_defs.h @@ -139,8 +139,6 @@ extern void amd64g_dirtyhelper_FNSAVES ( VexGuestAMD64State*, HWord ); extern ULong amd64g_calculate_mmx_pmaddwd ( ULong, ULong ); extern ULong amd64g_calculate_mmx_psadbw ( ULong, ULong ); -extern ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi ); - extern ULong amd64g_calc_crc32b ( ULong crcIn, ULong b ); extern ULong amd64g_calc_crc32w ( ULong crcIn, ULong w ); extern ULong amd64g_calc_crc32l ( ULong crcIn, ULong l ); diff --git a/VEX/priv/guest_amd64_helpers.c b/VEX/priv/guest_amd64_helpers.c index 69446a413..7cf94323e 100644 --- a/VEX/priv/guest_amd64_helpers.c +++ b/VEX/priv/guest_amd64_helpers.c @@ -4041,23 +4041,6 @@ static inline ULong mk32x2 ( UInt w1, UInt w0 ) { return (((ULong)w1) << 32) | ((ULong)w0); } -static inline UShort sel16x4_3 ( ULong w64 ) { - UInt hi32 = toUInt(w64 >> 32); - return toUShort(hi32 >> 16); -} -static inline UShort sel16x4_2 ( ULong w64 ) { - UInt hi32 = toUInt(w64 >> 32); - return toUShort(hi32); -} -static inline UShort sel16x4_1 ( ULong w64 ) { - UInt lo32 = toUInt(w64); - return toUShort(lo32 >> 16); -} -static inline UShort sel16x4_0 ( ULong w64 ) { - UInt lo32 = toUInt(w64); - return toUShort(lo32); -} - /* CALLED FROM GENERATED CODE: CLEAN HELPER */ ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy ) { @@ -4086,22 +4069,6 @@ ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy ) return (ULong)t; } -/* CALLED FROM GENERATED CODE: CLEAN HELPER */ -ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi ) -{ - UShort t, min; - UInt idx; - t = sel16x4_0(sLo); if (True) { min = t; idx = 0; } - t = sel16x4_1(sLo); if (t < min) { min = t; idx = 1; } - t = sel16x4_2(sLo); if (t < min) { min = t; idx = 2; } - t = sel16x4_3(sLo); if (t < min) { min = t; idx = 3; } - t = sel16x4_0(sHi); if (t < min) { min = t; idx = 4; } - t = sel16x4_1(sHi); if (t < min) { min = t; idx = 5; } - t = sel16x4_2(sHi); if (t < min) { min = t; idx = 6; } - t = sel16x4_3(sHi); if (t < min) { min = t; idx = 7; } - return ((ULong)(idx << 16)) | ((ULong)min); -} - /* CALLED FROM GENERATED CODE: CLEAN HELPER */ ULong amd64g_calc_crc32b ( ULong crcIn, ULong b ) { diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c index c30bd173d..387f6594e 100644 --- a/VEX/priv/guest_amd64_toIR.c +++ b/VEX/priv/guest_amd64_toIR.c @@ -17667,8 +17667,8 @@ static Long dis_PHMINPOSUW_128 ( const VexAbiInfo* vbi, Prefix pfx, assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); assign( dLo, mkIRExprCCall( Ity_I64, 0/*regparms*/, - "amd64g_calculate_sse_phminposuw", - &amd64g_calculate_sse_phminposuw, + "g_calculate_sse_phminposuw", + &g_calculate_sse_phminposuw, mkIRExprVec_2( mkexpr(sLo), mkexpr(sHi) ) )); (isAvx ? putYMMRegLoAndZU : putXMMReg) diff --git a/VEX/priv/guest_generic_helpers.h b/VEX/priv/guest_generic_helpers.h index 76d04c289..3584b7c2d 100644 --- a/VEX/priv/guest_generic_helpers.h +++ b/VEX/priv/guest_generic_helpers.h @@ -72,6 +72,38 @@ static inline UChar sel8x8_0 ( ULong w64 ) { UInt lo32 = toUInt(w64); return toUChar(lo32 >> 0); } +static inline UShort sel16x4_3 ( ULong w64 ) { + UInt hi32 = toUInt(w64 >> 32); + return toUShort(hi32 >> 16); +} +static inline UShort sel16x4_2 ( ULong w64 ) { + UInt hi32 = toUInt(w64 >> 32); + return toUShort(hi32); +} +static inline UShort sel16x4_1 ( ULong w64 ) { + UInt lo32 = toUInt(w64); + return toUShort(lo32 >> 16); +} +static inline UShort sel16x4_0 ( ULong w64 ) { + UInt lo32 = toUInt(w64); + return toUShort(lo32); +} + +/* CALLED FROM GENERATED CODE: CLEAN HELPER */ +static inline ULong g_calculate_sse_phminposuw ( ULong sLo, ULong sHi ) +{ + UShort t, min; + UInt idx; + t = sel16x4_0(sLo); if (True) { min = t; idx = 0; } + t = sel16x4_1(sLo); if (t < min) { min = t; idx = 1; } + t = sel16x4_2(sLo); if (t < min) { min = t; idx = 2; } + t = sel16x4_3(sLo); if (t < min) { min = t; idx = 3; } + t = sel16x4_0(sHi); if (t < min) { min = t; idx = 4; } + t = sel16x4_1(sHi); if (t < min) { min = t; idx = 5; } + t = sel16x4_2(sHi); if (t < min) { min = t; idx = 6; } + t = sel16x4_3(sHi); if (t < min) { min = t; idx = 7; } + return ((ULong)(idx << 16)) | ((ULong)min); +} static inline ULong sad_8x4 ( ULong xx, ULong yy ) { diff --git a/VEX/priv/guest_x86_helpers.c b/VEX/priv/guest_x86_helpers.c index 548bec5a9..51bb5d0f5 100644 --- a/VEX/priv/guest_x86_helpers.c +++ b/VEX/priv/guest_x86_helpers.c @@ -2661,23 +2661,6 @@ static inline ULong mk32x2 ( UInt w1, UInt w0 ) { return (((ULong)w1) << 32) | ((ULong)w0); } -static inline UShort sel16x4_3 ( ULong w64 ) { - UInt hi32 = toUInt(w64 >> 32); - return toUShort(hi32 >> 16); -} -static inline UShort sel16x4_2 ( ULong w64 ) { - UInt hi32 = toUInt(w64 >> 32); - return toUShort(hi32); -} -static inline UShort sel16x4_1 ( ULong w64 ) { - UInt lo32 = toUInt(w64); - return toUShort(lo32 >> 16); -} -static inline UShort sel16x4_0 ( ULong w64 ) { - UInt lo32 = toUInt(w64); - return toUShort(lo32); -} - /* CALLED FROM GENERATED CODE: CLEAN HELPER */ ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy ) { diff --git a/VEX/priv/guest_x86_toIR.c b/VEX/priv/guest_x86_toIR.c index dc90656c7..dfba4600a 100644 --- a/VEX/priv/guest_x86_toIR.c +++ b/VEX/priv/guest_x86_toIR.c @@ -13537,6 +13537,38 @@ DisResult disInstr_X86_WRK ( goto decode_success; } + /* 66 0F 38 41 /r = PHMINPOSUW xmm1, xmm2/m128 + Packed Horizontal Word Minimum from xmm2/m128 to xmm1 */ + if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x41) { + modrm = insn[3]; + IRTemp sV = newTemp(Ity_V128); + IRTemp sHi = newTemp(Ity_I64); + IRTemp sLo = newTemp(Ity_I64); + IRTemp dLo = newTemp(Ity_I64); + UInt rG = gregOfRM(modrm); + if (epartIsReg(modrm)) { + UInt rE = eregOfRM(modrm); + assign( sV, getXMMReg(rE) ); + delta += 1 + 3; + DIP("phminposuw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG)); + } else { + addr = disAMode ( &alen, sorb, delta + 3, dis_buf ); + gen_SEGV_if_not_16_aligned(addr); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += alen + 3; + DIP("phminposuw %s,%s\n", dis_buf, nameXMMReg(rG)); + } + assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); + assign( dLo, mkIRExprCCall( + Ity_I64, 0/*regparms*/, + "g_calculate_sse_phminposuw", + &g_calculate_sse_phminposuw, + mkIRExprVec_2( mkexpr(sLo), mkexpr(sHi) ) + )); + putXMMReg(rG, unop(Iop_64UtoV128, mkexpr(dLo))); + goto decode_success; + } /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1 diff --git a/none/tests/amd64/sse4-64.c b/none/tests/amd64/sse4-64.c index cc3be6901..43e26b548 100644 --- a/none/tests/amd64/sse4-64.c +++ b/none/tests/amd64/sse4-64.c @@ -1133,21 +1133,6 @@ void test_EXTRACTPS ( void ) } -void test_PHMINPOSUW ( void ) -{ - V128 src, dst; - Int i; - for (i = 0; i < 20; i++) { - randV128(&src); - randV128(&dst); - DO_mandr_r("phminposuw", src, dst); - } - memset(src, 0x55, sizeof(src)); - memset(dst, 0xAA, sizeof(dst)); - DO_mandr_r("phminposuw", src, dst); -} - - void test_PMOVSXBW ( void ) { V128 src, dst; diff --git a/none/tests/sse4-common.h b/none/tests/sse4-common.h index f031d7178..7c0203934 100644 --- a/none/tests/sse4-common.h +++ b/none/tests/sse4-common.h @@ -1723,4 +1723,18 @@ static inline void test_PACKUSDW ( void ) } } +static inline void test_PHMINPOSUW ( void ) +{ + V128 src, dst; + Int i; + for (i = 0; i < 20; i++) { + randV128(&src); + randV128(&dst); + DO_mandr_r("phminposuw", src, dst); + } + memset(src, 0x55, sizeof(src)); + memset(dst, 0xAA, sizeof(dst)); + DO_mandr_r("phminposuw", src, dst); +} + #endif /* __SSE4_COMMON_H */ diff --git a/none/tests/x86/sse4-x86.c b/none/tests/x86/sse4-x86.c index f25eaf1b4..94c8c478a 100644 --- a/none/tests/x86/sse4-x86.c +++ b/none/tests/x86/sse4-x86.c @@ -264,6 +264,7 @@ int main(void) test_PACKUSDW(); test_POPCNTL_x86(); test_POPCNTW_x86(); + test_PHMINPOSUW(); return 0; } diff --git a/none/tests/x86/sse4-x86.stdout.exp b/none/tests/x86/sse4-x86.stdout.exp index 3ab7f0517..5fb75fc2c 100644 --- a/none/tests/x86/sse4-x86.stdout.exp +++ b/none/tests/x86/sse4-x86.stdout.exp @@ -2824,3 +2824,45 @@ r popcntw ecfbb851 8d72310a 8d720007 00000000 m popcntw 19ee5c8a 8ad7294a 8ad70007 00000000 r popcntw 4996f18a 0e8d1e69 0e8d0008 00000000 m popcntw 717a2ae4 e71459b3 e7140007 00000000 +r phminposuw 5d4b2274ebe468fb5486cc410e1801e6 da300f480d5521c756acf4ab592c745f 000000000000000000000000000001e6 +m phminposuw 5d4b2274ebe468fb5486cc410e1801e6 da300f480d5521c756acf4ab592c745f 000000000000000000000000000001e6 +r phminposuw 4eb131f511ead55ba535ac12f939f9f0 7548f5f7fc86fa5b0decf3c4036f990f 000000000000000000000000000511ea +m phminposuw 4eb131f511ead55ba535ac12f939f9f0 7548f5f7fc86fa5b0decf3c4036f990f 000000000000000000000000000511ea +r phminposuw e9cdab2bb169e9ce39f9a86d69613a92 277c83d0f436d919b5e68b1cfefea5af 000000000000000000000000000339f9 +m phminposuw e9cdab2bb169e9ce39f9a86d69613a92 277c83d0f436d919b5e68b1cfefea5af 000000000000000000000000000339f9 +r phminposuw 8aed8c8368ede101ec9d3c3e779a80f9 4e19b64191f1faac2a6639a167e65368 00000000000000000000000000023c3e +m phminposuw 8aed8c8368ede101ec9d3c3e779a80f9 4e19b64191f1faac2a6639a167e65368 00000000000000000000000000023c3e +r phminposuw 8d5bd168d203f9a09aece47241f38651 456b8bb56e4498c148387a3e59326169 000000000000000000000000000141f3 +m phminposuw 8d5bd168d203f9a09aece47241f38651 456b8bb56e4498c148387a3e59326169 000000000000000000000000000141f3 +r phminposuw 4f64754589366d561fb31cf3e37509c5 67befd9728bbf004eb28c9dff1ee88dc 000000000000000000000000000009c5 +m phminposuw 4f64754589366d561fb31cf3e37509c5 67befd9728bbf004eb28c9dff1ee88dc 000000000000000000000000000009c5 +r phminposuw 2a5474882c1378d057bf61ae782ec582 125e08555ae03e20ee01a2704a2787ed 00000000000000000000000000072a54 +m phminposuw 2a5474882c1378d057bf61ae782ec582 125e08555ae03e20ee01a2704a2787ed 00000000000000000000000000072a54 +r phminposuw 7b77ca9c542457b91dd92e901c2a76b3 a097a85aa041bdc22f9182dd81e819c9 00000000000000000000000000011c2a +m phminposuw 7b77ca9c542457b91dd92e901c2a76b3 a097a85aa041bdc22f9182dd81e819c9 00000000000000000000000000011c2a +r phminposuw 9e1873eda0f846be4fd0ff83ec73d785 6fb5da129769aa9589a2e312b13df99c 000000000000000000000000000446be +m phminposuw 9e1873eda0f846be4fd0ff83ec73d785 6fb5da129769aa9589a2e312b13df99c 000000000000000000000000000446be +r phminposuw ef856ce7a918818bc86f50750417a524 d90399e9dae54146d80143fbf733e392 00000000000000000000000000010417 +m phminposuw ef856ce7a918818bc86f50750417a524 d90399e9dae54146d80143fbf733e392 00000000000000000000000000010417 +r phminposuw c908b0f60d1243cb64819d517f229cbb 3bcee14c0640be80f87a1d846ed595d5 00000000000000000000000000050d12 +m phminposuw c908b0f60d1243cb64819d517f229cbb 3bcee14c0640be80f87a1d846ed595d5 00000000000000000000000000050d12 +r phminposuw 8aee3b876871c92bffd46303799e7777 f262aea5b7065cf0c4d8ee9a332fc994 00000000000000000000000000063b87 +m phminposuw 8aee3b876871c92bffd46303799e7777 f262aea5b7065cf0c4d8ee9a332fc994 00000000000000000000000000063b87 +r phminposuw 8d83090455c14f5674321d770f99f383 580bfd6287c458421ae93027614e3cf9 00000000000000000000000000060904 +m phminposuw 8d83090455c14f5674321d770f99f383 580bfd6287c458421ae93027614e3cf9 00000000000000000000000000060904 +r phminposuw 2e1317db718f10f9a16847995c1fcc0c ca15c8ed1505ee21d5776119143daa31 000000000000000000000000000410f9 +m phminposuw 2e1317db718f10f9a16847995c1fcc0c ca15c8ed1505ee21d5776119143daa31 000000000000000000000000000410f9 +r phminposuw c9ea6076576649c061425d557d3abd3d a5cc0db4fa55593ad150fc5b6908ce67 000000000000000000000000000449c0 +m phminposuw c9ea6076576649c061425d557d3abd3d a5cc0db4fa55593ad150fc5b6908ce67 000000000000000000000000000449c0 +r phminposuw ba53e043a3d33656908bdc978df88444 437bc721d541d638e93e7dd97cbc65c8 00000000000000000000000000043656 +m phminposuw ba53e043a3d33656908bdc978df88444 437bc721d541d638e93e7dd97cbc65c8 00000000000000000000000000043656 +r phminposuw 5c9c93adf261126709113f4ca965db4a 016ff3a23f54a1c8fb0e617f68642b80 00000000000000000000000000030911 +m phminposuw 5c9c93adf261126709113f4ca965db4a 016ff3a23f54a1c8fb0e617f68642b80 00000000000000000000000000030911 +r phminposuw 0d0f7520e09c1ba1aa9e025eec8c7e7e 3bf48ba2d61af696e18c2239490ddcba 0000000000000000000000000002025e +m phminposuw 0d0f7520e09c1ba1aa9e025eec8c7e7e 3bf48ba2d61af696e18c2239490ddcba 0000000000000000000000000002025e +r phminposuw 28f9830807118bae4dffa1bb73792b09 4d568d8d3620104d79833ef33cc133a2 00000000000000000000000000050711 +m phminposuw 28f9830807118bae4dffa1bb73792b09 4d568d8d3620104d79833ef33cc133a2 00000000000000000000000000050711 +r phminposuw 08a6b8d1064a9e3ad001994e59389c1a 94e1f4cffaf12c9a9dc130995c8eed66 0000000000000000000000000005064a +m phminposuw 08a6b8d1064a9e3ad001994e59389c1a 94e1f4cffaf12c9a9dc130995c8eed66 0000000000000000000000000005064a +r phminposuw 55555555555555555555555555555555 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 00000000000000000000000000005555 +m phminposuw 55555555555555555555555555555555 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 00000000000000000000000000005555