From: Mark Wielaard Date: Wed, 18 Feb 2026 16:48:15 +0000 (+0100) Subject: Add MOVNTDQA SSE4.1 support for x86 X-Git-Tag: VALGRIND_3_27_0~4 X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=064e31d4c9b72afed2b238d36219fb697c33e51e;p=thirdparty%2Fvalgrind.git Add MOVNTDQA SSE4.1 support for x86 Add handling of MOVNTDQA to VEX/priv/guest_x86_toIR.c based on the guest_amd64_toIR.c implementation. Move test_MOVNTDQA from none/tests/amd64/sse4-64.c to none/tests/sse4-common.h and add the same test to none/tests/x86/sse4-x86.c with new MOVNTDQA output in stdout.exp. https://bugs.kde.org/show_bug.cgi?id=516225 --- diff --git a/NEWS b/NEWS index f6dfb5d7b..ad90467c5 100644 --- a/NEWS +++ b/NEWS @@ -126,6 +126,7 @@ are not entered into bugzilla tend to get forgotten about or ignored. 515810 Update the LTP version in valgrind testsuite to 20260130 515992 Add FreeBSD /proc virtualisation for cmdline and file 516090 Regression : Linux FreeBSD and Darwin: refactor *at syscall dirfd checks +516225 Add MOVNTDQA SSE4.1 support for x86 516289 illumos lsframe2 regtest fails 516748 Incorrect use of SET_STATUS_Failure for syscall wrappers that return error codes rather than -1 on error diff --git a/VEX/priv/guest_x86_toIR.c b/VEX/priv/guest_x86_toIR.c index 27305c2bd..35677385d 100644 --- a/VEX/priv/guest_x86_toIR.c +++ b/VEX/priv/guest_x86_toIR.c @@ -13150,6 +13150,23 @@ DisResult disInstr_X86_WRK ( goto decode_success; } + /* 66 0F 38 2A /r MOVNTDQA xmm1, m128 + "non-temporal" "streaming" load + Handle like MOVDQA but only memory operand is allowed */ + if ( sz == 2 + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x2A ) { + modrm = insn[3]; + if ( !epartIsReg( modrm ) ) { + addr = disAMode( &alen, sorb, delta+3, dis_buf ); + gen_SEGV_if_not_16_aligned( addr ); + putXMMReg( gregOfRM(modrm), + loadLE(Ity_V128, mkexpr(addr)) ); + DIP("movntdqa %s,%s\n", dis_buf, nameXMMReg(gregOfRM(modrm))); + delta += 3 + alen; + goto decode_success; + } + } + /* 66 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */ if ( sz == 2 diff --git a/none/tests/amd64/sse4-64.c b/none/tests/amd64/sse4-64.c index c341c282e..65f610a07 100644 --- a/none/tests/amd64/sse4-64.c +++ b/none/tests/amd64/sse4-64.c @@ -2871,18 +2871,6 @@ void test_PTEST ( void ) } } -void test_MOVNTDQA ( void ) -{ - V128 src, dst; - Int i; - for (i = 0; i < 10; i++) { - randV128(&src); - /* make sure the load actually happens */ - randV128(&dst); - DO_m_r("movntdqa", src, dst); - } -} - /* ------------ main ------------ */ int main ( int argc, char** argv ) diff --git a/none/tests/sse4-common.h b/none/tests/sse4-common.h index 5727b65b4..97e11080c 100644 --- a/none/tests/sse4-common.h +++ b/none/tests/sse4-common.h @@ -895,4 +895,16 @@ static inline void test_MPSADBW ( void ) } } +static inline void test_MOVNTDQA ( void ) +{ + V128 src, dst; + Int i; + for (i = 0; i < 10; i++) { + randV128(&src); + /* make sure the load actually happens */ + randV128(&dst); + DO_m_r("movntdqa", src, dst); + } +} + #endif /* __SSE4_COMMON_H */ diff --git a/none/tests/x86/sse4-x86.c b/none/tests/x86/sse4-x86.c index 1ce441339..9996d63e9 100644 --- a/none/tests/x86/sse4-x86.c +++ b/none/tests/x86/sse4-x86.c @@ -165,6 +165,7 @@ int main(void) test_PTEST(); test_PCMPEQQ(); test_MPSADBW(); + test_MOVNTDQA(); return 0; } diff --git a/none/tests/x86/sse4-x86.stdout.exp b/none/tests/x86/sse4-x86.stdout.exp index 3c64dc93f..cf91b1336 100644 --- a/none/tests/x86/sse4-x86.stdout.exp +++ b/none/tests/x86/sse4-x86.stdout.exp @@ -2042,3 +2042,13 @@ r mpsadbw $6 3637c27a144a5b20f8ab9814aff9c5f0 bafd469c03bb81a72d0fa3c734a9306 m mpsadbw $6 3637c27a144a5b20f8ab9814aff9c5f0 bafd469c03bb81a72d0fa3c734a93060 014b01770190012f01370109017f0143 r mpsadbw $7 3637c27a144a5b20f8ab9814aff9c5f0 bafd469c03bb81a72d0fa3c734a93060 0173017500a80125013701bb0157009d m mpsadbw $7 3637c27a144a5b20f8ab9814aff9c5f0 bafd469c03bb81a72d0fa3c734a93060 0173017500a80125013701bb0157009d +m movntdqa 5e28e61e7d9809fed89f25ffb69a16f0 dc31117d86c46bc9c3241e0a49fd7e17 5e28e61e7d9809fed89f25ffb69a16f0 +m movntdqa d1f115970180fe0f9bc76e95e06250a9 b6a224a9b26dfb35eb12d4ad50bc53dc d1f115970180fe0f9bc76e95e06250a9 +m movntdqa e9dd4c503b8c78011defefc04a5c2f46 a49c7d8b21406d977fa6409c64f46bdc e9dd4c503b8c78011defefc04a5c2f46 +m movntdqa 033786b7c84ab17d3be2256e10956ff4 026a179172ccfc9a5caddec3a1b08243 033786b7c84ab17d3be2256e10956ff4 +m movntdqa 7c4dbf374346e632cf6e8a894c18cbde 2c59ee263f9ae6eb5ef02a0e24fd533c 7c4dbf374346e632cf6e8a894c18cbde +m movntdqa ae69f33c480a53cab65d9cff1df10031 7db5feb724386535623ea06909e69bf4 ae69f33c480a53cab65d9cff1df10031 +m movntdqa f6d81f33742433f2cc7dd6bb9c2cca19 53ca44aebd31b5254262bdc16b771596 f6d81f33742433f2cc7dd6bb9c2cca19 +m movntdqa b0e63d866320c355ed98b4a9e8d6e4c1 09e4bb78a8121467db27fc0066bc7f4f b0e63d866320c355ed98b4a9e8d6e4c1 +m movntdqa 39df4ba2b0883fa0f57ab3b51afb0c56 fb4f5f827e66bca6095bd91417c2934b 39df4ba2b0883fa0f57ab3b51afb0c56 +m movntdqa eb0e45f4f7eae27ec0f14ecb50a5fc04 84562c36ddb9ea8ea8c8d0e79a950eb5 eb0e45f4f7eae27ec0f14ecb50a5fc04