From: Alexandra Hájková Date: Wed, 17 Dec 2025 11:59:19 +0000 (-0500) Subject: Add SSE4.1 PMULLD instruction for x86 32 bit X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c3339bae0e25b65a65e31c625563bfdf6a7953bf;p=thirdparty%2Fvalgrind.git Add SSE4.1 PMULLD instruction for x86 32 bit Support pmulld (packed multiply 32-bit doubleword integers) instruction in guest_x86_toIR.c and host_x86_isel.c. Add test function to sse4-common.h and update none/tests/x86/sse4-x86.c to test the instruction. BZ: https://bugs.kde.org/show_bug.cgi?id=513475 --- diff --git a/NEWS b/NEWS index 5650083a1..4a2593eaf 100644 --- a/NEWS +++ b/NEWS @@ -54,6 +54,7 @@ are not entered into bugzilla tend to get forgotten about or ignored. 513257 Add missing syswraps for lsm_list_modules 513522 m_libcassert.c: 'ordered comparison of pointer with integer zero' compiler warning +513475 Add SSE4.1 PMULLD instruction for x86 32 bit To see details of a given bug, visit https://bugs.kde.org/show_bug.cgi?id=XXXXXX diff --git a/VEX/priv/guest_x86_toIR.c b/VEX/priv/guest_x86_toIR.c index af4edf21e..bd4ccd54b 100644 --- a/VEX/priv/guest_x86_toIR.c +++ b/VEX/priv/guest_x86_toIR.c @@ -13069,6 +13069,38 @@ DisResult disInstr_X86_WRK ( goto decode_success; } + /* 66 0F 38 40 /r - PMULLD xmm1, xmm2/m128 + 32x4 integer multiply from xmm2/m128 to xmm1 */ + if (sz == 2 + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x40) { + + modrm = insn[3]; + + IRTemp argL = newTemp(Ity_V128); + IRTemp argR = newTemp(Ity_V128); + + if (epartIsReg(modrm)) { + assign( argL, getXMMReg( eregOfRM(modrm) ) ); + delta += 3+1; + DIP( "pmulld %s,%s\n", + nameXMMReg( eregOfRM(modrm) ), + nameXMMReg( gregOfRM(modrm) ) ); + } else { + addr = disAMode( &alen, sorb, delta+3, dis_buf ); + assign( argL, loadLE( Ity_V128, mkexpr(addr) )); + delta += 3+alen; + DIP( "pmulld %s,%s\n", + dis_buf, nameXMMReg( gregOfRM(modrm) ) ); + } + + assign(argR, getXMMReg( gregOfRM(modrm) )); + + putXMMReg( gregOfRM(modrm), + binop( Iop_Mul32x4, mkexpr(argL), mkexpr(argR)) ); + + goto decode_success; + } + /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1 (Partial implementation only -- only deal with cases where the rounding mode is specified directly by the immediate byte.) diff --git a/VEX/priv/host_x86_isel.c b/VEX/priv/host_x86_isel.c index 22a5702c0..773235cda 100644 --- a/VEX/priv/host_x86_isel.c +++ b/VEX/priv/host_x86_isel.c @@ -3891,6 +3891,9 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e ) case Iop_Min32Ux4: fn = (HWord)h_generic_calc_Min32Ux4; goto do_SseAssistedBinary; + case Iop_Mul32x4: + fn = (HWord)h_generic_calc_Mul32x4; + goto do_SseAssistedBinary; do_SseAssistedBinary: { /* As with the amd64 case (where this is copied from) we generate pretty bad code. */ diff --git a/none/tests/amd64/sse4-64.c b/none/tests/amd64/sse4-64.c index f4207b858..a4614016e 100644 --- a/none/tests/amd64/sse4-64.c +++ b/none/tests/amd64/sse4-64.c @@ -1637,18 +1637,6 @@ void test_PMULDQ ( void ) } -void test_PMULLD ( void ) -{ - V128 src, dst; - Int i; - for (i = 0; i < 10; i++) { - randV128(&src); - randV128(&dst); - DO_mandr_r("pmulld", src, dst); - } -} - - void test_POPCNTQ ( void ) { ULong block[4]; diff --git a/none/tests/sse4-common.h b/none/tests/sse4-common.h index 1191d4bb0..5576c1f73 100644 --- a/none/tests/sse4-common.h +++ b/none/tests/sse4-common.h @@ -340,4 +340,15 @@ static inline void test_PMINUW ( void ) } } +static inline void test_PMULLD ( void ) +{ + V128 src, dst; + Int i; + for (i = 0; i < 10; i++) { + randV128(&src); + randV128(&dst); + DO_mandr_r("pmulld", src, dst); + } +} + #endif /* __SSE4_COMMON_H */ diff --git a/none/tests/x86/sse4-x86.c b/none/tests/x86/sse4-x86.c index 664235775..f65e6467d 100644 --- a/none/tests/x86/sse4-x86.c +++ b/none/tests/x86/sse4-x86.c @@ -112,6 +112,7 @@ int main(void) test_PMINSD(); test_PMINUD(); test_PMINUW(); + test_PMULLD(); return 0; } diff --git a/none/tests/x86/sse4-x86.stdout.exp b/none/tests/x86/sse4-x86.stdout.exp index 515c22766..de3810a0e 100644 --- a/none/tests/x86/sse4-x86.stdout.exp +++ b/none/tests/x86/sse4-x86.stdout.exp @@ -166,3 +166,23 @@ r pminuw 9f043af6a1aed58f1ee978efa4b054d2 76f140aa4182b4e706a17746411ab40c 7 m pminuw 9f043af6a1aed58f1ee978efa4b054d2 76f140aa4182b4e706a17746411ab40c 76f13af64182b4e706a17746411a54d2 r pminuw 5e58aa8b4c88ae0d34fa174f9ce927c4 51f2275707e17ae4b3fd9698098ef5b0 51f2275707e17ae434fa174f098e27c4 m pminuw 5e58aa8b4c88ae0d34fa174f9ce927c4 51f2275707e17ae4b3fd9698098ef5b0 51f2275707e17ae434fa174f098e27c4 +r pmulld 2ad7482a960fb2b27014160ebbdb47e4 a7837c83faf3cb1d360794fec60222d6 869c457ee570642a5f0ff9e410a26098 +m pmulld 2ad7482a960fb2b27014160ebbdb47e4 a7837c83faf3cb1d360794fec60222d6 869c457ee570642a5f0ff9e410a26098 +r pmulld 61cd123e19cf1e2bb001f1161e946f5c d5f13a9ab645e140698bec649583f5aa 0ddd054c30e255c077f47498e538ff18 +m pmulld 61cd123e19cf1e2bb001f1161e946f5c d5f13a9ab645e140698bec649583f5aa 0ddd054c30e255c077f47498e538ff18 +r pmulld 5e86033374552e23ce8e2455e0205c58 37885d08d662faf92a541ab7911c2b5a 4b71a098414e0e0bf5309ac32c833ef0 +m pmulld 5e86033374552e23ce8e2455e0205c58 37885d08d662faf92a541ab7911c2b5a 4b71a098414e0e0bf5309ac32c833ef0 +r pmulld 7c4e1775412d1d47a8872cb61d8aca05 2993e139f7d64ff4532f9ae1d7da8010 85300e0d0562d0aca71dc7f678f12050 +m pmulld 7c4e1775412d1d47a8872cb61d8aca05 2993e139f7d64ff4532f9ae1d7da8010 85300e0d0562d0aca71dc7f678f12050 +r pmulld 19714a711ce1284318b88425f2de758f 0760c299b42e1fdcc2e9e9cf82c7aff8 107f1f896d68b6943d2586eb5a07a388 +m pmulld 19714a711ce1284318b88425f2de758f 0760c299b42e1fdcc2e9e9cf82c7aff8 107f1f896d68b6943d2586eb5a07a388 +r pmulld 8f3a9991a2ff8bc2fceca88e7b281821 2d39fd95a9f5a45d514c816eaff2763f f528ae65bf080d7ae6c7fb04282f261f +m pmulld 8f3a9991a2ff8bc2fceca88e7b281821 2d39fd95a9f5a45d514c816eaff2763f f528ae65bf080d7ae6c7fb04282f261f +r pmulld 3cf6fe426e1281712ef114ddd37570e8 f76b8d9773b81b24de24e0a879648e11 f35052ee3a791ee4558b1108f20d2f68 +m pmulld 3cf6fe426e1281712ef114ddd37570e8 f76b8d9773b81b24de24e0a879648e11 f35052ee3a791ee4558b1108f20d2f68 +r pmulld 7af177f11da748fc8b9145fe16d0390f c1426e0dae01c0dd433f816bfd2bb699 9ceea53da123018c3d7f3f2a1387c3f7 +m pmulld 7af177f11da748fc8b9145fe16d0390f c1426e0dae01c0dd433f816bfd2bb699 9ceea53da123018c3d7f3f2a1387c3f7 +r pmulld a77700084a491a0ef099b6dd61462ec3 e70a9c61f55fce335d68e1a25652a804 306be308b0b974caedc5f4da103eb30c +m pmulld a77700084a491a0ef099b6dd61462ec3 e70a9c61f55fce335d68e1a25652a804 306be308b0b974caedc5f4da103eb30c +r pmulld 1dd493f59184345437d5e366d0e20c30 c50f1401e45b82d3086a7a39a1e6217d edbeb7f57c65c93cb53a3db645122370 +m pmulld 1dd493f59184345437d5e366d0e20c30 c50f1401e45b82d3086a7a39a1e6217d edbeb7f57c65c93cb53a3db645122370