From: Tom Hughes Date: Mon, 13 May 2024 18:40:19 +0000 (+0200) Subject: Bug 276780 - An instruction in fftw (Fast Fourier Transform) is unhandled by valgrind... X-Git-Tag: VALGRIND_3_24_0~140 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=92b6fa13d4ecf075c434d0316d493383edf7aa6d;p=thirdparty%2Fvalgrind.git Bug 276780 - An instruction in fftw (Fast Fourier Transform) is unhandled by valgrind: vex x86->IR: unhandled instruction bytes: 0x66 0xF 0x3A 0x22 --- diff --git a/.gitignore b/.gitignore index 9d90d1c65..232059dae 100644 --- a/.gitignore +++ b/.gitignore @@ -2312,6 +2312,7 @@ /none/tests/x86/sigcontext /none/tests/x86/smc1 /none/tests/x86/ssse3_misaligned +/none/tests/x86/sse4-x86 /none/tests/x86/x86locked /none/tests/x86/x87trigOOR /none/tests/x86/xadd diff --git a/NEWS b/NEWS index f8be2521a..9dbb03216 100644 --- a/NEWS +++ b/NEWS @@ -23,6 +23,9 @@ bugzilla (https://bugs.kde.org/enter_bug.cgi?product=valgrind) rather than mailing the developers (or mailing lists) directly -- bugs that are not entered into bugzilla tend to get forgotten about or ignored. +276780 An instruction in fftw (Fast Fourier Transform) is unhandled by + valgrind: vex x86->IR: unhandled instruction bytes: + 0x66 0xF 0x3A 0x2 377966 arm64 unhandled instruction dc zva392146 aarch64: unhandled instruction 0xD5380001 (MRS rT, midr_el1) 412377 SIGILL on cache flushes on arm64 diff --git a/VEX/priv/guest_x86_toIR.c b/VEX/priv/guest_x86_toIR.c index 3b6efb387..7b31bd769 100644 --- a/VEX/priv/guest_x86_toIR.c +++ b/VEX/priv/guest_x86_toIR.c @@ -12941,6 +12941,62 @@ DisResult disInstr_X86_WRK ( /* --- start of the SSE4 decoder --- */ /* ---------------------------------------------------- */ + /* 66 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8 + Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */ + if ( sz == 2 + && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x22 ) { + + Int imm8_10; + IRTemp src_elems = newTemp(Ity_I32); + IRTemp src_vec = newTemp(Ity_V128); + IRTemp z32 = newTemp(Ity_I32); + + modrm = insn[3]; + + if ( epartIsReg( modrm ) ) { + imm8_10 = (Int)(insn[3+1] & 3); + assign( src_elems, getIReg( 4, eregOfRM(modrm) ) ); + delta += 3+1+1; + DIP( "pinsrd $%d, %s,%s\n", imm8_10, + nameIReg( 4, eregOfRM(modrm) ), + nameXMMReg( gregOfRM(modrm) ) ); + } else { + addr = disAMode( &alen, sorb, delta+3, dis_buf ); + imm8_10 = (Int)(insn[3+alen] & 3); + assign( src_elems, loadLE( Ity_I32, mkexpr(addr) ) ); + delta += 3+alen+1; + DIP( "pinsrd $%d, %s,%s\n", + imm8_10, dis_buf, nameXMMReg( gregOfRM(modrm) ) ); + } + + assign(z32, mkU32(0)); + + UShort mask = 0; + switch (imm8_10) { + case 3: mask = 0x0FFF; + assign(src_vec, mk128from32s(src_elems, z32, z32, z32)); + break; + case 2: mask = 0xF0FF; + assign(src_vec, mk128from32s(z32, src_elems, z32, z32)); + break; + case 1: mask = 0xFF0F; + assign(src_vec, mk128from32s(z32, z32, src_elems, z32)); + break; + case 0: mask = 0xFFF0; + assign(src_vec, mk128from32s(z32, z32, z32, src_elems)); + break; + default: vassert(0); + } + + putXMMReg( gregOfRM(modrm), + binop( Iop_OrV128, mkexpr(src_vec), + binop( Iop_AndV128, + getXMMReg( gregOfRM(modrm) ), + mkV128(mask) ) ) ); + + goto decode_success; + } + /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1 (Partial implementation only -- only deal with cases where the rounding mode is specified directly by the immediate byte.) diff --git a/none/tests/x86/Makefile.am b/none/tests/x86/Makefile.am index dbae86571..0606bb17c 100644 --- a/none/tests/x86/Makefile.am +++ b/none/tests/x86/Makefile.am @@ -75,6 +75,7 @@ EXTRA_DIST = \ smc1.stderr.exp smc1.stdout.exp smc1.vgtest \ ssse3_misaligned.stderr.exp ssse3_misaligned.stdout.exp \ ssse3_misaligned.vgtest ssse3_misaligned.c \ + sse4-x86.stdout.exp sse4-x86.stderr.exp sse4-x86.vgtest \ x86locked.vgtest x86locked.stdout.exp x86locked.stderr.exp \ x87trigOOR.vgtest x87trigOOR.stdout.exp x87trigOOR.stderr.exp \ yield.stderr.exp yield.stdout.exp yield.disabled \ @@ -119,6 +120,9 @@ check_PROGRAMS = \ if BUILD_SSSE3_TESTS check_PROGRAMS += ssse3_misaligned endif +if BUILD_SSE42_TESTS + check_PROGRAMS += sse4-x86 +endif if BUILD_LZCNT_TESTS check_PROGRAMS += lzcnt32 endif diff --git a/none/tests/x86/sse4-x86.c b/none/tests/x86/sse4-x86.c new file mode 100644 index 000000000..0fec4bda6 --- /dev/null +++ b/none/tests/x86/sse4-x86.c @@ -0,0 +1,271 @@ + +/* A program to test SSE4.1/SSE4.2 instructions. + Copied from amd64 version. +*/ + +#include +#include +#include +#include "tests/malloc.h" +#include + + +typedef unsigned char V128[16]; +typedef unsigned int UInt; +typedef signed int Int; +typedef unsigned char UChar; +typedef unsigned long long int ULong; + +typedef unsigned char Bool; +#define False ((Bool)0) +#define True ((Bool)1) + + +typedef + struct { + V128 arg1; + V128 arg2; + V128 res; + } + RRArgs; + +typedef + struct { + V128 arg1; + V128 res; + } + RMArgs; + + +static UChar randUChar ( void ) +{ + static UInt seed = 80021; + seed = 1103515245 * seed + 12345; + return (seed >> 17) & 0xFF; +} + + +static ULong randULong ( void ) +{ + Int i; + ULong r = 0; + for (i = 0; i < 8; i++) { + r = (r << 8) | (ULong)(0xFF & randUChar()); + } + return r; +} + + +static void showV128 ( V128* v ) +{ + Int i; + for (i = 15; i >= 0; i--) + printf("%02x", (Int)(*v)[i]); +} + + +static void showIGVV( char* rOrM, char* op, Int imm, + ULong src64, V128* dst, V128* res ) +{ + printf("%s %10s $%d ", rOrM, op, imm); + printf("%016llx", src64); + printf(" "); + showV128(dst); + printf(" "); + showV128(res); + printf("\n"); +} + +static V128 fives = { 0x55,0x55,0x55,0x55, 0x55,0x55,0x55,0x55, + 0x55,0x55,0x55,0x55, 0x55,0x55,0x55,0x55 }; + +static V128 zeroes = { 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00 }; + +#define DO_imm_r_r(_opname, _imm, _src, _dst) \ + { \ + V128 _tmp; \ + __asm__ __volatile__( \ + "movupd (%0), %%xmm2" "\n\t" \ + "movupd (%1), %%xmm11" "\n\t" \ + _opname " $" #_imm ", %%xmm2, %%xmm11" "\n\t" \ + "movupd %%xmm11, (%2)" "\n" \ + : /*out*/ : /*in*/ "r"(&(_src)), "r"(&(_dst)), "r"(&(_tmp)) \ + : "cc", "memory", "xmm2", "xmm11" \ + ); \ + RRArgs rra; \ + memcpy(&rra.arg1, &(_src), sizeof(V128)); \ + memcpy(&rra.arg2, &(_dst), sizeof(V128)); \ + memcpy(&rra.res, &(_tmp), sizeof(V128)); \ + showIAA("r", (_opname), (_imm), &rra, &AllMask); \ + } + +#define DO_imm_m_r(_opname, _imm, _src, _dst) \ + { \ + V128 _tmp; \ + V128* _srcM = memalign16(sizeof(V128)); \ + memcpy(_srcM, &(_src), sizeof(V128)); \ + __asm__ __volatile__( \ + "movupd (%1), %%xmm11" "\n\t" \ + _opname " $" #_imm ", (%0), %%xmm11" "\n\t" \ + "movupd %%xmm11, (%2)" "\n" \ + : /*out*/ : /*in*/ "r"(_srcM), "r"(&(_dst)), "r"(&(_tmp)) \ + : "cc", "memory", "xmm11" \ + ); \ + RRArgs rra; \ + memcpy(&rra.arg1, &(_src), sizeof(V128)); \ + memcpy(&rra.arg2, &(_dst), sizeof(V128)); \ + memcpy(&rra.res, &(_tmp), sizeof(V128)); \ + showIAA("m", (_opname), (_imm), &rra, &AllMask); \ + free(_srcM); \ + } + +#define DO_imm_mandr_r(_opname, _imm, _src, _dst) \ + DO_imm_r_r( _opname, _imm, _src, _dst ) \ + DO_imm_m_r( _opname, _imm, _src, _dst ) + +#define DO_r_r(_opname, _src, _dst) \ + { \ + V128 _tmp; \ + __asm__ __volatile__( \ + "movupd (%0), %%xmm2" "\n\t" \ + "movupd (%1), %%xmm11" "\n\t" \ + _opname " %%xmm2, %%xmm11" "\n\t" \ + "movupd %%xmm11, (%2)" "\n" \ + : /*out*/ : /*in*/ "r"(&(_src)), "r"(&(_dst)), "r"(&(_tmp)) \ + : "cc", "memory", "xmm2", "xmm11" \ + ); \ + RRArgs rra; \ + memcpy(&rra.arg1, &(_src), sizeof(V128)); \ + memcpy(&rra.arg2, &(_dst), sizeof(V128)); \ + memcpy(&rra.res, &(_tmp), sizeof(V128)); \ + showAA("r", (_opname), &rra, &AllMask); \ + } + +#define DO_m_r(_opname, _src, _dst) \ + { \ + V128 _tmp; \ + V128* _srcM = memalign16(sizeof(V128)); \ + memcpy(_srcM, &(_src), sizeof(V128)); \ + __asm__ __volatile__( \ + "movupd (%1), %%xmm11" "\n\t" \ + _opname " (%0), %%xmm11" "\n\t" \ + "movupd %%xmm11, (%2)" "\n" \ + : /*out*/ : /*in*/ "r"(_srcM), "r"(&(_dst)), "r"(&(_tmp)) \ + : "cc", "memory", "xmm11" \ + ); \ + RRArgs rra; \ + memcpy(&rra.arg1, &(_src), sizeof(V128)); \ + memcpy(&rra.arg2, &(_dst), sizeof(V128)); \ + memcpy(&rra.res, &(_tmp), sizeof(V128)); \ + showAA("m", (_opname), &rra, &AllMask); \ + free(_srcM); \ + } + +#define DO_mandr_r(_opname, _src, _dst) \ + DO_r_r(_opname, _src, _dst) \ + DO_m_r(_opname, _src, _dst) + +#define DO_imm_r_to_rscalar(_opname, _imm, _src) \ + { \ + ULong _scbefore = 0x5555555555555555ULL; \ + ULong _scafter = 0xAAAAAAAAAAAAAAAAULL; \ + /* This assumes that gcc won't make any of %0, %1, %2 */ \ + /* be r11. That should be ensured (cough, cough) */ \ + /* by declaring r11 to be clobbered. */ \ + __asm__ __volatile__( \ + "movupd (%0), %%xmm2" "\n\t" \ + "movq (%1), %%r11" "\n\t" \ + _opname " $" #_imm ", %%xmm2, %%r11" "\n\t" \ + "movq %%r11, (%2)" "\n" \ + : /*out*/ \ + : /*in*/ "r"(&(_src)), "r"(&(_scbefore)), "r"(&(_scafter)) \ + : "cc", "memory", "xmm2", "r11" \ + ); \ + showIAG("r", (_opname), (_imm), &(_src), (_scbefore), (_scafter)); \ + } + +#define DO_imm_r_to_mscalar(_opname, _imm, _src) \ + { \ + ULong _scbefore = 0x5555555555555555ULL; \ + ULong _scafter = _scbefore; \ + __asm__ __volatile__( \ + "movupd (%0), %%xmm2" "\n\t" \ + _opname " $" #_imm ", %%xmm2, (%1)" "\n\t" \ + : /*out*/ \ + : /*in*/ "r"(&(_src)), "r"(&(_scafter)) \ + : "cc", "memory", "xmm2" \ + ); \ + showIAG("m", (_opname), (_imm), &(_src), (_scbefore), (_scafter)); \ + } + +#define DO_imm_r_to_mandrscalar(_opname, _imm, _src ) \ + DO_imm_r_to_rscalar( _opname, _imm, _src ) \ + DO_imm_r_to_mscalar( _opname, _imm, _src ) + + +#define DO_imm_rscalar_to_r(_opname, _imm, _src) \ + { \ + V128 dstv; \ + V128 res; \ + ULong src64 = (ULong)(_src); \ + memcpy(dstv, fives, sizeof(dstv)); \ + memcpy(res, zeroes, sizeof(res)); \ + __asm__ __volatile__( \ + "movupd (%0), %%xmm2" "\n\t" /*dstv*/ \ + "mov (%1), %%eax" "\n\t" /*src64*/ \ + _opname " $" #_imm ", %%eax" ", %%xmm2" "\n\t" \ + "movupd %%xmm2, (%2)" "\n" /*res*/ \ + : /*out*/ \ + : /*in*/ "r"(&dstv), "r"(&src64), "r"(&res) \ + : "cc", "memory", "xmm2", "eax" \ + ); \ + showIGVV("r", (_opname), (_imm), src64, &dstv, &res); \ + } + +#define DO_imm_mscalar_to_r(_opname, _imm, _src) \ + { \ + V128 dstv; \ + V128 res; \ + ULong src64 = (ULong)(_src); \ + memcpy(dstv, fives, sizeof(dstv)); \ + memcpy(res, zeroes, sizeof(res)); \ + __asm__ __volatile__( \ + "movupd (%0), %%xmm2" "\n\t" /*dstv*/ \ + _opname " $" #_imm ", (%1), %%xmm2" "\n\t" \ + "movupd %%xmm2, (%2)" "\n" /*res*/ \ + : /*out*/ \ + : /*in*/ "r"(&dstv), "r"(&src64), "r"(&res) \ + : "cc", "memory", "xmm2" \ + ); \ + showIGVV("m", (_opname), (_imm), src64, &dstv, &res); \ + } + +#define DO_imm_mandrscalar_to_r(_opname, _imm, _src ) \ + DO_imm_rscalar_to_r( _opname, _imm, _src ) \ + DO_imm_mscalar_to_r( _opname, _imm, _src ) + + +void test_PINSRD ( void ) +{ + ULong src; + src = randULong(); + DO_imm_mandrscalar_to_r("pinsrd", 0, src); + src = randULong(); + DO_imm_mandrscalar_to_r("pinsrd", 1, src); + src = randULong(); + DO_imm_mandrscalar_to_r("pinsrd", 2, src); + src = randULong(); + DO_imm_mandrscalar_to_r("pinsrd", 3, src); +} + +/* ------------ main ------------ */ + +int main(void) +{ + // ------ SSE 4.1 ------ + test_PINSRD(); + + return 0; +} + diff --git a/none/tests/x86/sse4-x86.stderr.exp b/none/tests/x86/sse4-x86.stderr.exp new file mode 100644 index 000000000..e69de29bb diff --git a/none/tests/x86/sse4-x86.stdout.exp b/none/tests/x86/sse4-x86.stdout.exp new file mode 100644 index 000000000..9642a3756 --- /dev/null +++ b/none/tests/x86/sse4-x86.stdout.exp @@ -0,0 +1,8 @@ +r pinsrd $0 00571784494af298 55555555555555555555555555555555 555555555555555555555555494af298 +m pinsrd $0 00571784494af298 55555555555555555555555555555555 555555555555555555555555494af298 +r pinsrd $1 1ecac9199de37551 55555555555555555555555555555555 55555555555555559de3755155555555 +m pinsrd $1 1ecac9199de37551 55555555555555555555555555555555 55555555555555559de3755155555555 +r pinsrd $2 3bd127afa6e9c369 55555555555555555555555555555555 55555555a6e9c3695555555555555555 +m pinsrd $2 3bd127afa6e9c369 55555555555555555555555555555555 55555555a6e9c3695555555555555555 +r pinsrd $3 0d6a95fac528657d 55555555555555555555555555555555 c528657d555555555555555555555555 +m pinsrd $3 0d6a95fac528657d 55555555555555555555555555555555 c528657d555555555555555555555555 diff --git a/none/tests/x86/sse4-x86.vgtest b/none/tests/x86/sse4-x86.vgtest new file mode 100644 index 000000000..77bf5fd44 --- /dev/null +++ b/none/tests/x86/sse4-x86.vgtest @@ -0,0 +1,3 @@ +prereq: test -x sse4-x86 +prog: sse4-x86 +vgopts: -q