Moved sel16x4_0/1/2/3 to VEX/priv/guest_generic_helpers.h.
Moved amd64g_calculate_sse_phminposuw from guest_amd64_helpers.c
to guest_generic_helpers.h and renamed to
g_calculate_sse_phminposuw so both x86 and amd64 can use it.
Add test function to sse4-common.h and update
none/tests/x86/sse4-x86.c to test the instruction.
BZ: https://bugs.kde.org/show_bug.cgi?id=518217
519574 valgrind 3.27 "--fair-sched=yes" does not work
519613 Valgrind incorrectly unpacks the result of sys_port (port_getn)
on error, leading to a ~60s wallclock time delay on every call
+518217 Add PHMINPOSUW SSE4.1 support for x86
520482 Advertise POPCNT on x86 via CPUID
520753 Advertise LZCNT via CPUID for x86 (32-bit) clients
520856 unhandled instruction bytes: 0x2E 0xFF 0x14 0x85
extern ULong amd64g_calculate_mmx_pmaddwd ( ULong, ULong );
extern ULong amd64g_calculate_mmx_psadbw ( ULong, ULong );
-extern ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi );
-
extern ULong amd64g_calc_crc32b ( ULong crcIn, ULong b );
extern ULong amd64g_calc_crc32w ( ULong crcIn, ULong w );
extern ULong amd64g_calc_crc32l ( ULong crcIn, ULong l );
return (((ULong)w1) << 32) | ((ULong)w0);
}
-static inline UShort sel16x4_3 ( ULong w64 ) {
- UInt hi32 = toUInt(w64 >> 32);
- return toUShort(hi32 >> 16);
-}
-static inline UShort sel16x4_2 ( ULong w64 ) {
- UInt hi32 = toUInt(w64 >> 32);
- return toUShort(hi32);
-}
-static inline UShort sel16x4_1 ( ULong w64 ) {
- UInt lo32 = toUInt(w64);
- return toUShort(lo32 >> 16);
-}
-static inline UShort sel16x4_0 ( ULong w64 ) {
- UInt lo32 = toUInt(w64);
- return toUShort(lo32);
-}
-
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
{
return (ULong)t;
}
-/* CALLED FROM GENERATED CODE: CLEAN HELPER */
-ULong amd64g_calculate_sse_phminposuw ( ULong sLo, ULong sHi )
-{
- UShort t, min;
- UInt idx;
- t = sel16x4_0(sLo); if (True) { min = t; idx = 0; }
- t = sel16x4_1(sLo); if (t < min) { min = t; idx = 1; }
- t = sel16x4_2(sLo); if (t < min) { min = t; idx = 2; }
- t = sel16x4_3(sLo); if (t < min) { min = t; idx = 3; }
- t = sel16x4_0(sHi); if (t < min) { min = t; idx = 4; }
- t = sel16x4_1(sHi); if (t < min) { min = t; idx = 5; }
- t = sel16x4_2(sHi); if (t < min) { min = t; idx = 6; }
- t = sel16x4_3(sHi); if (t < min) { min = t; idx = 7; }
- return ((ULong)(idx << 16)) | ((ULong)min);
-}
-
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
ULong amd64g_calc_crc32b ( ULong crcIn, ULong b )
{
assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
assign( dLo, mkIRExprCCall(
Ity_I64, 0/*regparms*/,
- "amd64g_calculate_sse_phminposuw",
- &amd64g_calculate_sse_phminposuw,
+ "g_calculate_sse_phminposuw",
+ &g_calculate_sse_phminposuw,
mkIRExprVec_2( mkexpr(sLo), mkexpr(sHi) )
));
(isAvx ? putYMMRegLoAndZU : putXMMReg)
UInt lo32 = toUInt(w64);
return toUChar(lo32 >> 0);
}
+static inline UShort sel16x4_3 ( ULong w64 ) {
+ UInt hi32 = toUInt(w64 >> 32);
+ return toUShort(hi32 >> 16);
+}
+static inline UShort sel16x4_2 ( ULong w64 ) {
+ UInt hi32 = toUInt(w64 >> 32);
+ return toUShort(hi32);
+}
+static inline UShort sel16x4_1 ( ULong w64 ) {
+ UInt lo32 = toUInt(w64);
+ return toUShort(lo32 >> 16);
+}
+static inline UShort sel16x4_0 ( ULong w64 ) {
+ UInt lo32 = toUInt(w64);
+ return toUShort(lo32);
+}
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+static inline ULong g_calculate_sse_phminposuw ( ULong sLo, ULong sHi )
+{
+ UShort t, min;
+ UInt idx;
+ t = sel16x4_0(sLo); if (True) { min = t; idx = 0; }
+ t = sel16x4_1(sLo); if (t < min) { min = t; idx = 1; }
+ t = sel16x4_2(sLo); if (t < min) { min = t; idx = 2; }
+ t = sel16x4_3(sLo); if (t < min) { min = t; idx = 3; }
+ t = sel16x4_0(sHi); if (t < min) { min = t; idx = 4; }
+ t = sel16x4_1(sHi); if (t < min) { min = t; idx = 5; }
+ t = sel16x4_2(sHi); if (t < min) { min = t; idx = 6; }
+ t = sel16x4_3(sHi); if (t < min) { min = t; idx = 7; }
+ return ((ULong)(idx << 16)) | ((ULong)min);
+}
static inline ULong sad_8x4 ( ULong xx, ULong yy )
{
return (((ULong)w1) << 32) | ((ULong)w0);
}
-static inline UShort sel16x4_3 ( ULong w64 ) {
- UInt hi32 = toUInt(w64 >> 32);
- return toUShort(hi32 >> 16);
-}
-static inline UShort sel16x4_2 ( ULong w64 ) {
- UInt hi32 = toUInt(w64 >> 32);
- return toUShort(hi32);
-}
-static inline UShort sel16x4_1 ( ULong w64 ) {
- UInt lo32 = toUInt(w64);
- return toUShort(lo32 >> 16);
-}
-static inline UShort sel16x4_0 ( ULong w64 ) {
- UInt lo32 = toUInt(w64);
- return toUShort(lo32);
-}
-
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
ULong x86g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
{
goto decode_success;
}
+ /* 66 0F 38 41 /r = PHMINPOSUW xmm1, xmm2/m128
+ Packed Horizontal Word Minimum from xmm2/m128 to xmm1 */
+ if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x41) {
+ modrm = insn[3];
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+ UInt rG = gregOfRM(modrm);
+ if (epartIsReg(modrm)) {
+ UInt rE = eregOfRM(modrm);
+ assign( sV, getXMMReg(rE) );
+ delta += 1 + 3;
+ DIP("phminposuw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
+ } else {
+ addr = disAMode ( &alen, sorb, delta + 3, dis_buf );
+ gen_SEGV_if_not_16_aligned(addr);
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += alen + 3;
+ DIP("phminposuw %s,%s\n", dis_buf, nameXMMReg(rG));
+ }
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+ assign( dLo, mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,
+ "g_calculate_sse_phminposuw",
+ &g_calculate_sse_phminposuw,
+ mkIRExprVec_2( mkexpr(sLo), mkexpr(sHi) )
+ ));
+ putXMMReg(rG, unop(Iop_64UtoV128, mkexpr(dLo)));
+ goto decode_success;
+ }
/* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
}
-void test_PHMINPOSUW ( void )
-{
- V128 src, dst;
- Int i;
- for (i = 0; i < 20; i++) {
- randV128(&src);
- randV128(&dst);
- DO_mandr_r("phminposuw", src, dst);
- }
- memset(src, 0x55, sizeof(src));
- memset(dst, 0xAA, sizeof(dst));
- DO_mandr_r("phminposuw", src, dst);
-}
-
-
void test_PMOVSXBW ( void )
{
V128 src, dst;
}
}
+static inline void test_PHMINPOSUW ( void )
+{
+ V128 src, dst;
+ Int i;
+ for (i = 0; i < 20; i++) {
+ randV128(&src);
+ randV128(&dst);
+ DO_mandr_r("phminposuw", src, dst);
+ }
+ memset(src, 0x55, sizeof(src));
+ memset(dst, 0xAA, sizeof(dst));
+ DO_mandr_r("phminposuw", src, dst);
+}
+
#endif /* __SSE4_COMMON_H */
test_PACKUSDW();
test_POPCNTL_x86();
test_POPCNTW_x86();
+ test_PHMINPOSUW();
return 0;
}
m popcntw 19ee5c8a 8ad7294a 8ad70007 00000000
r popcntw 4996f18a 0e8d1e69 0e8d0008 00000000
m popcntw 717a2ae4 e71459b3 e7140007 00000000
+r phminposuw 5d4b2274ebe468fb5486cc410e1801e6 da300f480d5521c756acf4ab592c745f 000000000000000000000000000001e6
+m phminposuw 5d4b2274ebe468fb5486cc410e1801e6 da300f480d5521c756acf4ab592c745f 000000000000000000000000000001e6
+r phminposuw 4eb131f511ead55ba535ac12f939f9f0 7548f5f7fc86fa5b0decf3c4036f990f 000000000000000000000000000511ea
+m phminposuw 4eb131f511ead55ba535ac12f939f9f0 7548f5f7fc86fa5b0decf3c4036f990f 000000000000000000000000000511ea
+r phminposuw e9cdab2bb169e9ce39f9a86d69613a92 277c83d0f436d919b5e68b1cfefea5af 000000000000000000000000000339f9
+m phminposuw e9cdab2bb169e9ce39f9a86d69613a92 277c83d0f436d919b5e68b1cfefea5af 000000000000000000000000000339f9
+r phminposuw 8aed8c8368ede101ec9d3c3e779a80f9 4e19b64191f1faac2a6639a167e65368 00000000000000000000000000023c3e
+m phminposuw 8aed8c8368ede101ec9d3c3e779a80f9 4e19b64191f1faac2a6639a167e65368 00000000000000000000000000023c3e
+r phminposuw 8d5bd168d203f9a09aece47241f38651 456b8bb56e4498c148387a3e59326169 000000000000000000000000000141f3
+m phminposuw 8d5bd168d203f9a09aece47241f38651 456b8bb56e4498c148387a3e59326169 000000000000000000000000000141f3
+r phminposuw 4f64754589366d561fb31cf3e37509c5 67befd9728bbf004eb28c9dff1ee88dc 000000000000000000000000000009c5
+m phminposuw 4f64754589366d561fb31cf3e37509c5 67befd9728bbf004eb28c9dff1ee88dc 000000000000000000000000000009c5
+r phminposuw 2a5474882c1378d057bf61ae782ec582 125e08555ae03e20ee01a2704a2787ed 00000000000000000000000000072a54
+m phminposuw 2a5474882c1378d057bf61ae782ec582 125e08555ae03e20ee01a2704a2787ed 00000000000000000000000000072a54
+r phminposuw 7b77ca9c542457b91dd92e901c2a76b3 a097a85aa041bdc22f9182dd81e819c9 00000000000000000000000000011c2a
+m phminposuw 7b77ca9c542457b91dd92e901c2a76b3 a097a85aa041bdc22f9182dd81e819c9 00000000000000000000000000011c2a
+r phminposuw 9e1873eda0f846be4fd0ff83ec73d785 6fb5da129769aa9589a2e312b13df99c 000000000000000000000000000446be
+m phminposuw 9e1873eda0f846be4fd0ff83ec73d785 6fb5da129769aa9589a2e312b13df99c 000000000000000000000000000446be
+r phminposuw ef856ce7a918818bc86f50750417a524 d90399e9dae54146d80143fbf733e392 00000000000000000000000000010417
+m phminposuw ef856ce7a918818bc86f50750417a524 d90399e9dae54146d80143fbf733e392 00000000000000000000000000010417
+r phminposuw c908b0f60d1243cb64819d517f229cbb 3bcee14c0640be80f87a1d846ed595d5 00000000000000000000000000050d12
+m phminposuw c908b0f60d1243cb64819d517f229cbb 3bcee14c0640be80f87a1d846ed595d5 00000000000000000000000000050d12
+r phminposuw 8aee3b876871c92bffd46303799e7777 f262aea5b7065cf0c4d8ee9a332fc994 00000000000000000000000000063b87
+m phminposuw 8aee3b876871c92bffd46303799e7777 f262aea5b7065cf0c4d8ee9a332fc994 00000000000000000000000000063b87
+r phminposuw 8d83090455c14f5674321d770f99f383 580bfd6287c458421ae93027614e3cf9 00000000000000000000000000060904
+m phminposuw 8d83090455c14f5674321d770f99f383 580bfd6287c458421ae93027614e3cf9 00000000000000000000000000060904
+r phminposuw 2e1317db718f10f9a16847995c1fcc0c ca15c8ed1505ee21d5776119143daa31 000000000000000000000000000410f9
+m phminposuw 2e1317db718f10f9a16847995c1fcc0c ca15c8ed1505ee21d5776119143daa31 000000000000000000000000000410f9
+r phminposuw c9ea6076576649c061425d557d3abd3d a5cc0db4fa55593ad150fc5b6908ce67 000000000000000000000000000449c0
+m phminposuw c9ea6076576649c061425d557d3abd3d a5cc0db4fa55593ad150fc5b6908ce67 000000000000000000000000000449c0
+r phminposuw ba53e043a3d33656908bdc978df88444 437bc721d541d638e93e7dd97cbc65c8 00000000000000000000000000043656
+m phminposuw ba53e043a3d33656908bdc978df88444 437bc721d541d638e93e7dd97cbc65c8 00000000000000000000000000043656
+r phminposuw 5c9c93adf261126709113f4ca965db4a 016ff3a23f54a1c8fb0e617f68642b80 00000000000000000000000000030911
+m phminposuw 5c9c93adf261126709113f4ca965db4a 016ff3a23f54a1c8fb0e617f68642b80 00000000000000000000000000030911
+r phminposuw 0d0f7520e09c1ba1aa9e025eec8c7e7e 3bf48ba2d61af696e18c2239490ddcba 0000000000000000000000000002025e
+m phminposuw 0d0f7520e09c1ba1aa9e025eec8c7e7e 3bf48ba2d61af696e18c2239490ddcba 0000000000000000000000000002025e
+r phminposuw 28f9830807118bae4dffa1bb73792b09 4d568d8d3620104d79833ef33cc133a2 00000000000000000000000000050711
+m phminposuw 28f9830807118bae4dffa1bb73792b09 4d568d8d3620104d79833ef33cc133a2 00000000000000000000000000050711
+r phminposuw 08a6b8d1064a9e3ad001994e59389c1a 94e1f4cffaf12c9a9dc130995c8eed66 0000000000000000000000000005064a
+m phminposuw 08a6b8d1064a9e3ad001994e59389c1a 94e1f4cffaf12c9a9dc130995c8eed66 0000000000000000000000000005064a
+r phminposuw 55555555555555555555555555555555 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 00000000000000000000000000005555
+m phminposuw 55555555555555555555555555555555 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 00000000000000000000000000005555