extern void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st );
extern void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st );
extern void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st );
+extern void amd64g_dirtyhelper_CPUID_avx2 ( VexGuestAMD64State* st );
extern void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* );
-extern void amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM
- ( VexGuestAMD64State*, HWord );
-extern VexEmNote amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM
- ( VexGuestAMD64State*, HWord );
+extern void amd64g_dirtyhelper_XSAVE_COMPONENT_0
+ ( VexGuestAMD64State* gst, HWord addr );
+extern void amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
+ ( VexGuestAMD64State* gst, HWord addr );
+
+extern VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_0
+ ( VexGuestAMD64State* gst, HWord addr );
+extern VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
+ ( VexGuestAMD64State* gst, HWord addr );
extern ULong amd64g_dirtyhelper_RDTSC ( void );
extern void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* st );
}
-static
-void do_fxsave ( VexGuestAMD64State* gst, HWord addr, Bool save_xmm_regs )
+/*---------------------------------------------------------------*/
+/*--- Supporting functions for XSAVE/FXSAVE. ---*/
+/*---------------------------------------------------------------*/
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (reads guest state, writes guest mem) */
+/* XSAVE component 0 is the x87 FPU state. */
+void amd64g_dirtyhelper_XSAVE_COMPONENT_0
+ ( VexGuestAMD64State* gst, HWord addr )
{
/* Derived from values obtained from
vendor_id : AuthenticAMD
Fpu_State tmp;
UShort* addrS = (UShort*)addr;
UChar* addrC = (UChar*)addr;
- UInt mxcsr;
UShort fp_tags;
UInt summary_tags;
Int r, stno;
UShort *srcS, *dstS;
do_get_x87( gst, (UChar*)&tmp );
- mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND );
- /* Now build the proper fxsave image from the x87 image we just
- made. */
+ /* Now build the proper fxsave x87 image from the fsave x87 image
+ we just made. */
addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */
addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */
addrS[10] = 0; /* BOGUS */
addrS[11] = 0; /* BOGUS */
- addrS[12] = toUShort(mxcsr); /* MXCSR */
- addrS[13] = toUShort(mxcsr >> 16);
-
- addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */
- addrS[15] = 0x0000; /* MXCSR mask (hi16) */
+ /* addrS[13,12] are MXCSR -- not written */
+ /* addrS[15,14] are MXCSR_MASK -- not written */
/* Copy in the FP registers, in ST order. */
for (stno = 0; stno < 8; stno++) {
dstS[6] = 0;
dstS[7] = 0;
}
+}
+
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (reads guest state, writes guest mem) */
+/* XSAVE component 1 is the SSE state. */
+void amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
+ ( VexGuestAMD64State* gst, HWord addr )
+{
+ UShort* addrS = (UShort*)addr;
+ UInt mxcsr;
+
+ /* The only non-register parts of the SSE state are MXCSR and
+ MXCSR_MASK. */
+ mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND );
+
+ addrS[12] = toUShort(mxcsr); /* MXCSR */
+ addrS[13] = toUShort(mxcsr >> 16);
+
+ addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */
+ addrS[15] = 0x0000; /* MXCSR mask (hi16) */
+}
+
+
+/* VISIBLE TO LIBVEX CLIENT */
+/* Do FXSAVE from the supplied VexGuestAMD64State structure and store
+ the result at the given address which represents a buffer of at
+ least 416 bytes.
+
+ This function is not called from generated code. FXSAVE is dealt
+ with by the amd64 front end by calling the XSAVE_COMPONENT_{0,1}
+ functions above plus some in-line IR. This function is merely a
+ convenience function for VEX's users.
+*/
+void LibVEX_GuestAMD64_fxsave ( /*IN*/VexGuestAMD64State* gst,
+ /*OUT*/HWord fp_state )
+{
+ /* Do the x87 part */
+ amd64g_dirtyhelper_XSAVE_COMPONENT_0(gst, fp_state);
+
+ /* And now the SSE part, except for the registers themselves. */
+ amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS(gst, fp_state);
/* That's the first 160 bytes of the image done. */
- if (save_xmm_regs == True) {
- /* Now only %xmm0 .. %xmm15 remain to be copied. If the host is
- big-endian, these need to be byte-swapped. */
- U128 *xmm = (U128 *)(addr + 160);
-
- vassert(host_is_little_endian());
-
-# define COPY_U128(_dst,_src) \
- do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
- _dst[2] = _src[2]; _dst[3] = _src[3]; } \
- while (0)
-
- COPY_U128( xmm[0], gst->guest_YMM0 );
- COPY_U128( xmm[1], gst->guest_YMM1 );
- COPY_U128( xmm[2], gst->guest_YMM2 );
- COPY_U128( xmm[3], gst->guest_YMM3 );
- COPY_U128( xmm[4], gst->guest_YMM4 );
- COPY_U128( xmm[5], gst->guest_YMM5 );
- COPY_U128( xmm[6], gst->guest_YMM6 );
- COPY_U128( xmm[7], gst->guest_YMM7 );
- COPY_U128( xmm[8], gst->guest_YMM8 );
- COPY_U128( xmm[9], gst->guest_YMM9 );
- COPY_U128( xmm[10], gst->guest_YMM10 );
- COPY_U128( xmm[11], gst->guest_YMM11 );
- COPY_U128( xmm[12], gst->guest_YMM12 );
- COPY_U128( xmm[13], gst->guest_YMM13 );
- COPY_U128( xmm[14], gst->guest_YMM14 );
- COPY_U128( xmm[15], gst->guest_YMM15 );
+ /* Now only %xmm0 .. %xmm15 remain to be copied. If the host is
+ big-endian, these need to be byte-swapped. */
+ U128 *xmm = (U128 *)(fp_state + 160);
+ vassert(host_is_little_endian());
+
+# define COPY_U128(_dst,_src) \
+ do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
+ _dst[2] = _src[2]; _dst[3] = _src[3]; } \
+ while (0)
+
+ COPY_U128( xmm[0], gst->guest_YMM0 );
+ COPY_U128( xmm[1], gst->guest_YMM1 );
+ COPY_U128( xmm[2], gst->guest_YMM2 );
+ COPY_U128( xmm[3], gst->guest_YMM3 );
+ COPY_U128( xmm[4], gst->guest_YMM4 );
+ COPY_U128( xmm[5], gst->guest_YMM5 );
+ COPY_U128( xmm[6], gst->guest_YMM6 );
+ COPY_U128( xmm[7], gst->guest_YMM7 );
+ COPY_U128( xmm[8], gst->guest_YMM8 );
+ COPY_U128( xmm[9], gst->guest_YMM9 );
+ COPY_U128( xmm[10], gst->guest_YMM10 );
+ COPY_U128( xmm[11], gst->guest_YMM11 );
+ COPY_U128( xmm[12], gst->guest_YMM12 );
+ COPY_U128( xmm[13], gst->guest_YMM13 );
+ COPY_U128( xmm[14], gst->guest_YMM14 );
+ COPY_U128( xmm[15], gst->guest_YMM15 );
# undef COPY_U128
- } else {
- /* We let the generated IR to copy remaining %xmm0 .. %xmm15, so as to
- make Memcheck's definedness flow for the non-XMM parts independent from
- that of the all the other control and status words in the structure.
- This avoids the false positives shown in #291310. */
- }
}
-static
-VexEmNote do_fxrstor ( VexGuestAMD64State* gst, HWord addr,
- Bool rstor_xmm_regs )
+/*---------------------------------------------------------------*/
+/*--- Supporting functions for XRSTOR/FXRSTOR. ---*/
+/*---------------------------------------------------------------*/
+
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (writes guest state, reads guest mem) */
+VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_0
+ ( VexGuestAMD64State* gst, HWord addr )
{
Fpu_State tmp;
- VexEmNote warnX87 = EmNote_NONE;
- VexEmNote warnXMM = EmNote_NONE;
UShort* addrS = (UShort*)addr;
UChar* addrC = (UChar*)addr;
UShort fp_tags;
Int r, stno, i;
- if (rstor_xmm_regs == True) {
- /* Restore %xmm0 .. %xmm15. If the host is big-endian, these need
- to be byte-swapped. */
- U128 *xmm = (U128 *)(addr + 160);
-
- vassert(host_is_little_endian());
-
-# define COPY_U128(_dst,_src) \
- do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
- _dst[2] = _src[2]; _dst[3] = _src[3]; } \
- while (0)
-
- COPY_U128( gst->guest_YMM0, xmm[0] );
- COPY_U128( gst->guest_YMM1, xmm[1] );
- COPY_U128( gst->guest_YMM2, xmm[2] );
- COPY_U128( gst->guest_YMM3, xmm[3] );
- COPY_U128( gst->guest_YMM4, xmm[4] );
- COPY_U128( gst->guest_YMM5, xmm[5] );
- COPY_U128( gst->guest_YMM6, xmm[6] );
- COPY_U128( gst->guest_YMM7, xmm[7] );
- COPY_U128( gst->guest_YMM8, xmm[8] );
- COPY_U128( gst->guest_YMM9, xmm[9] );
- COPY_U128( gst->guest_YMM10, xmm[10] );
- COPY_U128( gst->guest_YMM11, xmm[11] );
- COPY_U128( gst->guest_YMM12, xmm[12] );
- COPY_U128( gst->guest_YMM13, xmm[13] );
- COPY_U128( gst->guest_YMM14, xmm[14] );
- COPY_U128( gst->guest_YMM15, xmm[15] );
-
-# undef COPY_U128
- } else {
- /* Don't restore %xmm0 .. %xmm15, for the same reasons that
- do_fxsave(save_xmm_regs = False) doesn't save them. See
- comment in that function for details. */
- }
-
/* Copy the x87 registers out of the image, into a temporary
Fpu_State struct. */
for (i = 0; i < 14; i++) tmp.env[i] = 0;
tmp.env[FP_ENV_TAG] = fp_tags;
/* Now write 'tmp' into the guest state. */
- warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
+ VexEmNote warnX87 = do_put_x87( True/*moveRegs*/, (UChar*)&tmp, gst );
- { UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
- | ((((UInt)addrS[13]) & 0xFFFF) << 16);
- ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 );
+ return warnX87;
+}
- warnXMM = (VexEmNote)(w64 >> 32);
- gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL;
- }
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (writes guest state, reads guest mem) */
+VexEmNote amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
+ ( VexGuestAMD64State* gst, HWord addr )
+{
+ UShort* addrS = (UShort*)addr;
+ UInt w32 = (((UInt)addrS[12]) & 0xFFFF)
+ | ((((UInt)addrS[13]) & 0xFFFF) << 16);
+ ULong w64 = amd64g_check_ldmxcsr( (ULong)w32 );
+
+ VexEmNote warnXMM = (VexEmNote)(w64 >> 32);
+
+ gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL;
+ return warnXMM;
+}
+
+
+/* VISIBLE TO LIBVEX CLIENT */
+/* Do FXRSTOR from the supplied address and store read values to the given
+ VexGuestAMD64State structure.
+
+ This function is not called from generated code. FXRSTOR is dealt
+ with by the amd64 front end by calling the XRSTOR_COMPONENT_{0,1}
+ functions above plus some in-line IR. This function is merely a
+ convenience function for VEX's users.
+*/
+VexEmNote LibVEX_GuestAMD64_fxrstor ( /*IN*/HWord fp_state,
+ /*MOD*/VexGuestAMD64State* gst )
+{
+ /* Restore %xmm0 .. %xmm15. If the host is big-endian, these need
+ to be byte-swapped. */
+ U128 *xmm = (U128 *)(fp_state + 160);
+
+ vassert(host_is_little_endian());
+
+# define COPY_U128(_dst,_src) \
+ do { _dst[0] = _src[0]; _dst[1] = _src[1]; \
+ _dst[2] = _src[2]; _dst[3] = _src[3]; } \
+ while (0)
+
+ COPY_U128( gst->guest_YMM0, xmm[0] );
+ COPY_U128( gst->guest_YMM1, xmm[1] );
+ COPY_U128( gst->guest_YMM2, xmm[2] );
+ COPY_U128( gst->guest_YMM3, xmm[3] );
+ COPY_U128( gst->guest_YMM4, xmm[4] );
+ COPY_U128( gst->guest_YMM5, xmm[5] );
+ COPY_U128( gst->guest_YMM6, xmm[6] );
+ COPY_U128( gst->guest_YMM7, xmm[7] );
+ COPY_U128( gst->guest_YMM8, xmm[8] );
+ COPY_U128( gst->guest_YMM9, xmm[9] );
+ COPY_U128( gst->guest_YMM10, xmm[10] );
+ COPY_U128( gst->guest_YMM11, xmm[11] );
+ COPY_U128( gst->guest_YMM12, xmm[12] );
+ COPY_U128( gst->guest_YMM13, xmm[13] );
+ COPY_U128( gst->guest_YMM14, xmm[14] );
+ COPY_U128( gst->guest_YMM15, xmm[15] );
+
+# undef COPY_U128
+
+ VexEmNote warnXMM
+ = amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS(gst, fp_state);
+ VexEmNote warnX87
+ = amd64g_dirtyhelper_XRSTOR_COMPONENT_0(gst, fp_state);
/* Prefer an X87 emwarn over an XMM one, if both exist. */
if (warnX87 != EmNote_NONE)
}
-/* CALLED FROM GENERATED CODE */
-/* DIRTY HELPER (reads guest state, writes guest mem) */
-/* NOTE: only handles 32-bit format (no REX.W on the insn) */
-/* NOTE: does not save XMM registers - see do_fxsave() for details */
-void amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM ( VexGuestAMD64State* gst,
- HWord addr )
-{
- do_fxsave( gst, addr, False );
-}
-
-/* CALLED FROM GENERATED CODE */
-/* DIRTY HELPER (writes guest state, reads guest mem) */
-VexEmNote amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM ( VexGuestAMD64State* gst,
- HWord addr )
-{
- return do_fxrstor( gst, addr, False );
-}
-
+/*---------------------------------------------------------------*/
+/*--- Supporting functions for FSAVE/FRSTOR ---*/
+/*---------------------------------------------------------------*/
/* DIRTY HELPER (writes guest state) */
/* Initialise the x87 FPU state as per 'finit'. */
return ew;
}
-/* VISIBLE TO LIBVEX CLIENT */
-/* Do FXSAVE from the supplied VexGuestAMD64tate structure and store the
- result at the given address which represents a buffer of at least 416
- bytes. Saves also XMM registers. */
-void LibVEX_GuestAMD64_fxsave ( /*IN*/VexGuestAMD64State* gst,
- /*OUT*/HWord fp_state )
-{
- do_fxsave( gst, fp_state, True );
-}
-
-/* VISIBLE TO LIBVEX CLIENT */
-/* Do FXRSTOR from the supplied address and store read values to the given
- VexGuestAMD64State structure. Restores also XMM registers. */
-VexEmNote LibVEX_GuestAMD64_fxrstor ( /*IN*/HWord fp_state,
- /*MOD*/VexGuestAMD64State* gst )
-{
- return do_fxrstor( gst, fp_state, True );
-}
-
/*---------------------------------------------------------------*/
-/*--- Misc integer helpers, including rotates and CPUID. ---*/
+/*--- CPUID helpers. ---*/
/*---------------------------------------------------------------*/
/* Claim to be the following CPU, which is probably representative of
/* Claim to be the following CPU (4 x ...), which is AVX and cx16
capable. Plus (kludge!) it "supports" HTM.
+ Also with the following change: claim that XSaveOpt is not
+ available, by cpuid(eax=0xD,ecx=1).eax[0] returns 0, compared to 1
+ on the real CPU. Consequently, programs that correctly observe
+ these CPUID values should only try to use 3 of the 8 XSave-family
+ instructions: XGETBV, XSAVE and XRSTOR. In particular this avoids
+ having to implement the compacted or optimised save/restore
+ variants.
+
vendor_id : GenuineIntel
cpu family : 6
model : 42
switch (old_ecx) {
case 0x00000000: SET_ABCD(0x00000007, 0x00000340,
0x00000340, 0x00000000); break;
- case 0x00000001: SET_ABCD(0x00000001, 0x00000000,
+ case 0x00000001: SET_ABCD(0x00000000, 0x00000000,
0x00000000, 0x00000000); break;
case 0x00000002: SET_ABCD(0x00000100, 0x00000240,
0x00000000, 0x00000000); break;
}
+/* Claim to be the following CPU (4 x ...), which is AVX2 capable.
+
+ With the following change: claim that XSaveOpt is not available, by
+ cpuid(eax=0xD,ecx=1).eax[0] returns 0, compared to 1 on the real
+ CPU. Consequently, programs that correctly observe these CPUID
+ values should only try to use 3 of the 8 XSave-family instructions:
+ XGETBV, XSAVE and XRSTOR. In particular this avoids having to
+ implement the compacted or optimised save/restore variants.
+
+ vendor_id : GenuineIntel
+ cpu family : 6
+ model : 60
+ model name : Intel(R) Core(TM) i7-4910MQ CPU @ 2.90GHz
+ stepping : 3
+ microcode : 0x1c
+ cpu MHz : 919.957
+ cache size : 8192 KB
+ physical id : 0
+ siblings : 4
+ core id : 3
+ cpu cores : 4
+ apicid : 6
+ initial apicid : 6
+ fpu : yes
+ fpu_exception : yes
+ cpuid level : 13
+ wp : yes
+ flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca
+ cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht
+ tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc
+ arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc
+ aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl
+ vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1
+ sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave
+ avx f16c rdrand lahf_lm abm ida arat epb pln pts dtherm
+ tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust
+ bmi1 avx2 smep bmi2 erms invpcid xsaveopt
+ bugs :
+ bogomips : 5786.68
+ clflush size : 64
+ cache_alignment : 64
+ address sizes : 39 bits physical, 48 bits virtual
+ power management:
+*/
+void amd64g_dirtyhelper_CPUID_avx2 ( VexGuestAMD64State* st )
+{
+# define SET_ABCD(_a,_b,_c,_d) \
+ do { st->guest_RAX = (ULong)(_a); \
+ st->guest_RBX = (ULong)(_b); \
+ st->guest_RCX = (ULong)(_c); \
+ st->guest_RDX = (ULong)(_d); \
+ } while (0)
+
+ UInt old_eax = (UInt)st->guest_RAX;
+ UInt old_ecx = (UInt)st->guest_RCX;
+
+ switch (old_eax) {
+ case 0x00000000:
+ SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
+ break;
+ case 0x00000001:
+ SET_ABCD(0x000306c3, 0x02100800, 0x7ffafbff, 0xbfebfbff);
+ break;
+ case 0x00000002:
+ SET_ABCD(0x76036301, 0x00f0b6ff, 0x00000000, 0x00c10000);
+ break;
+ case 0x00000003:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x00000004:
+ switch (old_ecx) {
+ case 0x00000000: SET_ABCD(0x1c004121, 0x01c0003f,
+ 0x0000003f, 0x00000000); break;
+ case 0x00000001: SET_ABCD(0x1c004122, 0x01c0003f,
+ 0x0000003f, 0x00000000); break;
+ case 0x00000002: SET_ABCD(0x1c004143, 0x01c0003f,
+ 0x000001ff, 0x00000000); break;
+ case 0x00000003: SET_ABCD(0x1c03c163, 0x03c0003f,
+ 0x00001fff, 0x00000006); break;
+ default: SET_ABCD(0x00000000, 0x00000000,
+ 0x00000000, 0x00000000); break;
+ }
+ break;
+ case 0x00000005:
+ SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00042120);
+ break;
+ case 0x00000006:
+ SET_ABCD(0x00000077, 0x00000002, 0x00000009, 0x00000000);
+ break;
+ case 0x00000007:
+ switch (old_ecx) {
+ case 0x00000000: SET_ABCD(0x00000000, 0x000027ab,
+ 0x00000000, 0x00000000); break;
+ default: SET_ABCD(0x00000000, 0x00000000,
+ 0x00000000, 0x00000000); break;
+ }
+ break;
+ case 0x00000008:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x00000009:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x0000000a:
+ SET_ABCD(0x07300803, 0x00000000, 0x00000000, 0x00000603);
+ break;
+ case 0x0000000b:
+ switch (old_ecx) {
+ case 0x00000000: SET_ABCD(0x00000001, 0x00000002,
+ 0x00000100, 0x00000002); break;
+ case 0x00000001: SET_ABCD(0x00000004, 0x00000008,
+ 0x00000201, 0x00000002); break;
+ default: SET_ABCD(0x00000000, 0x00000000,
+ old_ecx, 0x00000002); break;
+ }
+ break;
+ case 0x0000000c:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x0000000d:
+ switch (old_ecx) {
+ case 0x00000000: SET_ABCD(0x00000007, 0x00000340,
+ 0x00000340, 0x00000000); break;
+ case 0x00000001: SET_ABCD(0x00000000, 0x00000000,
+ 0x00000000, 0x00000000); break;
+ case 0x00000002: SET_ABCD(0x00000100, 0x00000240,
+ 0x00000000, 0x00000000); break;
+ default: SET_ABCD(0x00000000, 0x00000000,
+ 0x00000000, 0x00000000); break;
+ }
+ break;
+ case 0x80000000:
+ SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000001:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000021, 0x2c100800);
+ break;
+ case 0x80000002:
+ SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
+ break;
+ case 0x80000003:
+ SET_ABCD(0x37692029, 0x3139342d, 0x20514d30, 0x20555043);
+ break;
+ case 0x80000004:
+ SET_ABCD(0x2e322040, 0x48473039, 0x0000007a, 0x00000000);
+ break;
+ case 0x80000005:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000006:
+ SET_ABCD(0x00000000, 0x00000000, 0x01006040, 0x00000000);
+ break;
+ case 0x80000007:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000100);
+ break;
+ case 0x80000008:
+ SET_ABCD(0x00003027, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ default:
+ SET_ABCD(0x00000007, 0x00000340, 0x00000340, 0x00000000);
+ break;
+ }
+# undef SET_ABCD
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Misc integer helpers, including rotates and crypto. ---*/
+/*---------------------------------------------------------------*/
+
ULong amd64g_calculate_RCR ( ULong arg,
ULong rot_amt,
ULong rflags_in,
vpanic("doScalarWidening(amd64)");
}
+static
+void putGuarded ( Int gstOffB, IRExpr* guard, IRExpr* value )
+{
+ IRType ty = typeOfIRExpr(irsb->tyenv, value);
+ stmt( IRStmt_Put(gstOffB,
+ IRExpr_ITE(guard, value, IRExpr_Get(gstOffB, ty))) );
+}
/*------------------------------------------------------------*/
}
+/* Generate a dirty helper call that initialises the x87 state a la
+ FINIT. If |guard| is NULL, it is done unconditionally. Otherwise
+ |guard| is used as a guarding condition.
+*/
+static void gen_FINIT_SEQUENCE ( IRExpr* guard )
+{
+ /* Uses dirty helper:
+ void amd64g_do_FINIT ( VexGuestAMD64State* ) */
+ IRDirty* d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "amd64g_dirtyhelper_FINIT",
+ &amd64g_dirtyhelper_FINIT,
+ mkIRExprVec_1( IRExpr_BBPTR() )
+ );
+
+ /* declare we're writing guest state */
+ d->nFxState = 5;
+ vex_bzero(&d->fxState, sizeof(d->fxState));
+
+ d->fxState[0].fx = Ifx_Write;
+ d->fxState[0].offset = OFFB_FTOP;
+ d->fxState[0].size = sizeof(UInt);
+
+ d->fxState[1].fx = Ifx_Write;
+ d->fxState[1].offset = OFFB_FPREGS;
+ d->fxState[1].size = 8 * sizeof(ULong);
+
+ d->fxState[2].fx = Ifx_Write;
+ d->fxState[2].offset = OFFB_FPTAGS;
+ d->fxState[2].size = 8 * sizeof(UChar);
+
+ d->fxState[3].fx = Ifx_Write;
+ d->fxState[3].offset = OFFB_FPROUND;
+ d->fxState[3].size = sizeof(ULong);
+
+ d->fxState[4].fx = Ifx_Write;
+ d->fxState[4].offset = OFFB_FC3210;
+ d->fxState[4].size = sizeof(ULong);
+
+ if (guard)
+ d->guard = guard;
+
+ stmt( IRStmt_Dirty(d) );
+}
+
+
/* ------------------------------------------------------- */
/* Given all that stack-mangling junk, we can now go ahead
and describe FP instructions.
break;
case 0xE3: {
- /* Uses dirty helper:
- void amd64g_do_FINIT ( VexGuestAMD64State* ) */
- IRDirty* d = unsafeIRDirty_0_N (
- 0/*regparms*/,
- "amd64g_dirtyhelper_FINIT",
- &amd64g_dirtyhelper_FINIT,
- mkIRExprVec_1( IRExpr_BBPTR() )
- );
-
- /* declare we're writing guest state */
- d->nFxState = 5;
- vex_bzero(&d->fxState, sizeof(d->fxState));
-
- d->fxState[0].fx = Ifx_Write;
- d->fxState[0].offset = OFFB_FTOP;
- d->fxState[0].size = sizeof(UInt);
-
- d->fxState[1].fx = Ifx_Write;
- d->fxState[1].offset = OFFB_FPREGS;
- d->fxState[1].size = 8 * sizeof(ULong);
-
- d->fxState[2].fx = Ifx_Write;
- d->fxState[2].offset = OFFB_FPTAGS;
- d->fxState[2].size = 8 * sizeof(UChar);
-
- d->fxState[3].fx = Ifx_Write;
- d->fxState[3].offset = OFFB_FPROUND;
- d->fxState[3].size = sizeof(ULong);
-
- d->fxState[4].fx = Ifx_Write;
- d->fxState[4].offset = OFFB_FC3210;
- d->fxState[4].size = sizeof(ULong);
-
- stmt( IRStmt_Dirty(d) );
-
+ gen_FINIT_SEQUENCE(NULL/*no guarding condition*/);
DIP("fninit\n");
break;
}
gen_SEGV_if_not_XX_aligned(effective_addr, 32-1);
}
+static void gen_SEGV_if_not_64_aligned ( IRTemp effective_addr ) {
+ gen_SEGV_if_not_XX_aligned(effective_addr, 64-1);
+}
+
/* Helper for deciding whether a given insn (starting at the opcode
byte) may validly be used with a LOCK prefix. The following insns
may be used with LOCK when their destination operand is in memory.
}
+static void gen_XSAVE_SEQUENCE ( IRTemp addr, IRTemp rfbm )
+{
+ /* ------ rfbm[0] gates the x87 state ------ */
+
+ /* Uses dirty helper:
+ void amd64g_do_XSAVE_COMPONENT_0 ( VexGuestAMD64State*, ULong )
+ */
+ IRDirty* d0 = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "amd64g_dirtyhelper_XSAVE_COMPONENT_0",
+ &amd64g_dirtyhelper_XSAVE_COMPONENT_0,
+ mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
+ );
+ d0->guard = binop(Iop_CmpEQ64, binop(Iop_And64, mkexpr(rfbm), mkU64(1)),
+ mkU64(1));
+
+ /* Declare we're writing memory. Really, bytes 24 through 31
+ (MXCSR and MXCSR_MASK) aren't written, but we can't express more
+ than 1 memory area here, so just mark the whole thing as
+ written. */
+ d0->mFx = Ifx_Write;
+ d0->mAddr = mkexpr(addr);
+ d0->mSize = 160;
+
+ /* declare we're reading guest state */
+ d0->nFxState = 5;
+ vex_bzero(&d0->fxState, sizeof(d0->fxState));
+
+ d0->fxState[0].fx = Ifx_Read;
+ d0->fxState[0].offset = OFFB_FTOP;
+ d0->fxState[0].size = sizeof(UInt);
+
+ d0->fxState[1].fx = Ifx_Read;
+ d0->fxState[1].offset = OFFB_FPREGS;
+ d0->fxState[1].size = 8 * sizeof(ULong);
+
+ d0->fxState[2].fx = Ifx_Read;
+ d0->fxState[2].offset = OFFB_FPTAGS;
+ d0->fxState[2].size = 8 * sizeof(UChar);
+
+ d0->fxState[3].fx = Ifx_Read;
+ d0->fxState[3].offset = OFFB_FPROUND;
+ d0->fxState[3].size = sizeof(ULong);
+
+ d0->fxState[4].fx = Ifx_Read;
+ d0->fxState[4].offset = OFFB_FC3210;
+ d0->fxState[4].size = sizeof(ULong);
+
+ stmt( IRStmt_Dirty(d0) );
+
+ /* ------ rfbm[1] gates the SSE state ------ */
+
+ IRTemp rfbm_1 = newTemp(Ity_I64);
+ IRTemp rfbm_1or2 = newTemp(Ity_I64);
+ assign(rfbm_1, binop(Iop_And64, mkexpr(rfbm), mkU64(2)));
+ assign(rfbm_1or2, binop(Iop_And64, mkexpr(rfbm), mkU64(6)));
+
+ IRExpr* guard_1 = binop(Iop_CmpEQ64, mkexpr(rfbm_1), mkU64(2));
+ IRExpr* guard_1or2 = binop(Iop_CmpNE64, mkexpr(rfbm_1or2), mkU64(0));
+
+ /* Uses dirty helper:
+ void amd64g_do_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
+ ( VexGuestAMD64State*, ULong )
+ This creates only MXCSR and MXCSR_MASK. We need to do this if
+ either components 1 (SSE) or 2 (AVX) are requested. Hence the
+ guard condition is a bit more complex.
+ */
+ IRDirty* d1 = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS",
+ &amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS,
+ mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
+ );
+ d1->guard = guard_1or2;
+
+ /* Declare we're writing memory: MXCSR and MXCSR_MASK. Note that
+ the code for rbfm[0] just above claims a write of 0 .. 159, so
+ this duplicates it. But at least correctly connects 24 .. 31 to
+ the MXCSR guest state representation (SSEROUND field). */
+ d1->mFx = Ifx_Write;
+ d1->mAddr = binop(Iop_Add64, mkexpr(addr), mkU64(24));
+ d1->mSize = 8;
+
+ /* declare we're reading guest state */
+ d1->nFxState = 1;
+ vex_bzero(&d1->fxState, sizeof(d1->fxState));
+
+ d1->fxState[0].fx = Ifx_Read;
+ d1->fxState[0].offset = OFFB_SSEROUND;
+ d1->fxState[0].size = sizeof(ULong);
+
+ /* Call the helper. This creates MXCSR and MXCSR_MASK but nothing
+ else. We do the actual register array, XMM[0..15], separately,
+ in order that any undefinedness in the XMM registers is tracked
+ separately by Memcheck and does not "infect" the in-memory
+ shadow for the other parts of the image. */
+ stmt( IRStmt_Dirty(d1) );
+
+ /* And now the XMMs themselves. */
+ UInt reg;
+ for (reg = 0; reg < 16; reg++) {
+ stmt( IRStmt_StoreG(
+ Iend_LE,
+ binop(Iop_Add64, mkexpr(addr), mkU64(160 + reg * 16)),
+ getXMMReg(reg),
+ guard_1
+ ));
+ }
+
+ /* ------ rfbm[2] gates the AVX state ------ */
+ /* Component 2 is just a bunch of register saves, so we'll do it
+ inline, just to be simple and to be Memcheck friendly. */
+
+ IRTemp rfbm_2 = newTemp(Ity_I64);
+ assign(rfbm_2, binop(Iop_And64, mkexpr(rfbm), mkU64(4)));
+
+ IRExpr* guard_2 = binop(Iop_CmpEQ64, mkexpr(rfbm_2), mkU64(4));
+
+ for (reg = 0; reg < 16; reg++) {
+ stmt( IRStmt_StoreG(
+ Iend_LE,
+ binop(Iop_Add64, mkexpr(addr), mkU64(576 + reg * 16)),
+ getYMMRegLane128(reg,1),
+ guard_2
+ ));
+ }
+}
+
+
+static Long dis_XSAVE ( const VexAbiInfo* vbi,
+ Prefix pfx, Long delta, Int sz )
+{
+ /* Note that the presence or absence of REX.W (indicated here by
+ |sz|) slightly affects the written format: whether the saved FPU
+ IP and DP pointers are 64 or 32 bits. But the helper function
+ we call simply writes zero bits in the relevant fields, which
+ are 64 bits regardless of what REX.W is, and so it's good enough
+ (iow, equally broken) in both cases. */
+ IRTemp addr = IRTemp_INVALID;
+ Int alen = 0;
+ HChar dis_buf[50];
+ UChar modrm = getUChar(delta);
+ vassert(!epartIsReg(modrm)); /* ensured by caller */
+ vassert(sz == 4 || sz == 8); /* ditto */
+
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ delta += alen;
+ gen_SEGV_if_not_64_aligned(addr);
+
+ DIP("%sxsave %s\n", sz==8 ? "rex64/" : "", dis_buf);
+
+ /* VEX's caller is assumed to have checked this. */
+ const ULong aSSUMED_XCR0_VALUE = 7;
+
+ IRTemp rfbm = newTemp(Ity_I64);
+ assign(rfbm,
+ binop(Iop_And64,
+ binop(Iop_Or64,
+ binop(Iop_Shl64,
+ unop(Iop_32Uto64, getIRegRDX(4)), mkU8(32)),
+ unop(Iop_32Uto64, getIRegRAX(4))),
+ mkU64(aSSUMED_XCR0_VALUE)));
+
+ gen_XSAVE_SEQUENCE(addr, rfbm);
+
+ /* Finally, we need to update XSTATE_BV in the XSAVE header area, by
+ OR-ing the RFBM value into it. */
+ IRTemp addr_plus_512 = newTemp(Ity_I64);
+ assign(addr_plus_512, binop(Iop_Add64, mkexpr(addr), mkU64(512)));
+ storeLE( mkexpr(addr_plus_512),
+ binop(Iop_Or8,
+ unop(Iop_64to8, mkexpr(rfbm)),
+ loadLE(Ity_I8, mkexpr(addr_plus_512))) );
+
+ return delta;
+}
+
+
+static Long dis_FXSAVE ( const VexAbiInfo* vbi,
+ Prefix pfx, Long delta, Int sz )
+{
+ /* See comment in dis_XSAVE about the significance of REX.W. */
+ IRTemp addr = IRTemp_INVALID;
+ Int alen = 0;
+ HChar dis_buf[50];
+ UChar modrm = getUChar(delta);
+ vassert(!epartIsReg(modrm)); /* ensured by caller */
+ vassert(sz == 4 || sz == 8); /* ditto */
+
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ delta += alen;
+ gen_SEGV_if_not_16_aligned(addr);
+
+ DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf);
+
+ /* FXSAVE is just XSAVE with components 0 and 1 selected. Set rfbm
+ to 0b011, generate the XSAVE sequence accordingly, and let iropt
+ fold out the unused (AVX) parts accordingly. */
+ IRTemp rfbm = newTemp(Ity_I64);
+ assign(rfbm, mkU64(3));
+ gen_XSAVE_SEQUENCE(addr, rfbm);
+
+ return delta;
+}
+
+
+static void gen_XRSTOR_SEQUENCE ( IRTemp addr, IRTemp xstate_bv, IRTemp rfbm )
+{
+ /* ------ rfbm[0] gates the x87 state ------ */
+
+ /* If rfbm[0] == 1, we have to write the x87 state. If
+ xstate_bv[0] == 1, we will read it from the memory image, else
+ we'll set it to initial values. Doing this with a helper
+ function and getting the definedness flow annotations correct is
+ too difficult, so generate stupid but simple code: first set the
+ registers to initial values, regardless of xstate_bv[0]. Then,
+ conditionally restore from the memory image. */
+
+ IRTemp rfbm_0 = newTemp(Ity_I64);
+ IRTemp xstate_bv_0 = newTemp(Ity_I64);
+ IRTemp restore_0 = newTemp(Ity_I64);
+ assign(rfbm_0, binop(Iop_And64, mkexpr(rfbm), mkU64(1)));
+ assign(xstate_bv_0, binop(Iop_And64, mkexpr(xstate_bv), mkU64(1)));
+ assign(restore_0, binop(Iop_And64, mkexpr(rfbm_0), mkexpr(xstate_bv_0)));
+
+ gen_FINIT_SEQUENCE( binop(Iop_CmpNE64, mkexpr(rfbm_0), mkU64(0)) );
+
+ /* Uses dirty helper:
+ void amd64g_do_XRSTOR_COMPONENT_0 ( VexGuestAMD64State*, ULong )
+ */
+ IRDirty* d0 = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "amd64g_dirtyhelper_XRSTOR_COMPONENT_0",
+ &amd64g_dirtyhelper_XRSTOR_COMPONENT_0,
+ mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
+ );
+ d0->guard = binop(Iop_CmpNE64, mkexpr(restore_0), mkU64(0));
+
+ /* Declare we're reading memory. Really, bytes 24 through 31
+ (MXCSR and MXCSR_MASK) aren't read, but we can't express more
+ than 1 memory area here, so just mark the whole thing as
+ read. */
+ d0->mFx = Ifx_Read;
+ d0->mAddr = mkexpr(addr);
+ d0->mSize = 160;
+
+ /* declare we're writing guest state */
+ d0->nFxState = 5;
+ vex_bzero(&d0->fxState, sizeof(d0->fxState));
+
+ d0->fxState[0].fx = Ifx_Write;
+ d0->fxState[0].offset = OFFB_FTOP;
+ d0->fxState[0].size = sizeof(UInt);
+
+ d0->fxState[1].fx = Ifx_Write;
+ d0->fxState[1].offset = OFFB_FPREGS;
+ d0->fxState[1].size = 8 * sizeof(ULong);
+
+ d0->fxState[2].fx = Ifx_Write;
+ d0->fxState[2].offset = OFFB_FPTAGS;
+ d0->fxState[2].size = 8 * sizeof(UChar);
+
+ d0->fxState[3].fx = Ifx_Write;
+ d0->fxState[3].offset = OFFB_FPROUND;
+ d0->fxState[3].size = sizeof(ULong);
+
+ d0->fxState[4].fx = Ifx_Write;
+ d0->fxState[4].offset = OFFB_FC3210;
+ d0->fxState[4].size = sizeof(ULong);
+
+ stmt( IRStmt_Dirty(d0) );
+
+ /* ------ rfbm[1] gates the SSE state ------ */
+
+ /* Same scheme as component 0: first zero it out, and then possibly
+ restore from the memory area. */
+ IRTemp rfbm_1 = newTemp(Ity_I64);
+ IRTemp xstate_bv_1 = newTemp(Ity_I64);
+ IRTemp restore_1 = newTemp(Ity_I64);
+ assign(rfbm_1, binop(Iop_And64, mkexpr(rfbm), mkU64(2)));
+ assign(xstate_bv_1, binop(Iop_And64, mkexpr(xstate_bv), mkU64(2)));
+ assign(restore_1, binop(Iop_And64, mkexpr(rfbm_1), mkexpr(xstate_bv_1)));
+ IRExpr* rfbm_1e = binop(Iop_CmpNE64, mkexpr(rfbm_1), mkU64(0));
+ IRExpr* restore_1e = binop(Iop_CmpNE64, mkexpr(restore_1), mkU64(0));
+
+ IRTemp rfbm_1or2 = newTemp(Ity_I64);
+ IRTemp xstate_bv_1or2 = newTemp(Ity_I64);
+ IRTemp restore_1or2 = newTemp(Ity_I64);
+ assign(rfbm_1or2, binop(Iop_And64, mkexpr(rfbm), mkU64(6)));
+ assign(xstate_bv_1or2, binop(Iop_And64, mkexpr(xstate_bv), mkU64(6)));
+ assign(restore_1or2, binop(Iop_And64, mkexpr(rfbm_1or2),
+ mkexpr(xstate_bv_1or2)));
+ IRExpr* rfbm_1or2e = binop(Iop_CmpNE64, mkexpr(rfbm_1or2), mkU64(0));
+ IRExpr* restore_1or2e = binop(Iop_CmpNE64, mkexpr(restore_1or2), mkU64(0));
+
+ /* The areas in question are: SSEROUND, and the XMM register array. */
+ putGuarded(OFFB_SSEROUND, rfbm_1or2e, mkU64(Irrm_NEAREST));
+
+ UInt reg;
+ for (reg = 0; reg < 16; reg++) {
+ putGuarded(xmmGuestRegOffset(reg), rfbm_1e, mkV128(0));
+ }
+
+ /* And now possibly restore from MXCSR/MXCSR_MASK */
+ /* Uses dirty helper:
+ void amd64g_do_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
+ ( VexGuestAMD64State*, ULong )
+ This restores from only MXCSR and MXCSR_MASK. We need to do
+ this if either components 1 (SSE) or 2 (AVX) are requested.
+ Hence the guard condition is a bit more complex.
+ */
+ IRDirty* d1 = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS",
+ &amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS,
+ mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
+ ) ;
+ d1->guard = restore_1or2e;
+
+ /* Declare we're reading memory: MXCSR and MXCSR_MASK. Note that
+ the code for rbfm[0] just above claims a read of 0 .. 159, so
+ this duplicates it. But at least correctly connects 24 .. 31 to
+ the MXCSR guest state representation (SSEROUND field). */
+ d1->mFx = Ifx_Read;
+ d1->mAddr = binop(Iop_Add64, mkexpr(addr), mkU64(24));
+ d1->mSize = 8;
+
+ /* declare we're writing guest state */
+ d1->nFxState = 1;
+ vex_bzero(&d1->fxState, sizeof(d1->fxState));
+
+ d1->fxState[0].fx = Ifx_Write;
+ d1->fxState[0].offset = OFFB_SSEROUND;
+ d1->fxState[0].size = sizeof(ULong);
+
+ /* Call the helper. This creates SSEROUND but nothing
+ else. We do the actual register array, XMM[0..15], separately,
+ in order that any undefinedness in the XMM registers is tracked
+ separately by Memcheck and is not "infected" by the in-memory
+ shadow for the other parts of the image. */
+ stmt( IRStmt_Dirty(d1) );
+
+ /* And now the XMMs themselves. For each register, we PUT either
+ its old value, or the value loaded from memory. One convenient
+ way to do that is with a conditional load that has its the
+ default value, the old value of the register. */
+ for (reg = 0; reg < 16; reg++) {
+ IRExpr* ea = binop(Iop_Add64, mkexpr(addr), mkU64(160 + reg * 16));
+ IRExpr* alt = getXMMReg(reg);
+ IRTemp loadedValue = newTemp(Ity_V128);
+ stmt( IRStmt_LoadG(Iend_LE,
+ ILGop_IdentV128,
+ loadedValue, ea, alt, restore_1e) );
+ putXMMReg(reg, mkexpr(loadedValue));
+ }
+
+ /* ------ rfbm[2] gates the AVX state ------ */
+ /* Component 2 is just a bunch of register loads, so we'll do it
+ inline, just to be simple and to be Memcheck friendly. */
+
+ /* Same scheme as component 0: first zero it out, and then possibly
+ restore from the memory area. */
+ IRTemp rfbm_2 = newTemp(Ity_I64);
+ IRTemp xstate_bv_2 = newTemp(Ity_I64);
+ IRTemp restore_2 = newTemp(Ity_I64);
+ assign(rfbm_2, binop(Iop_And64, mkexpr(rfbm), mkU64(4)));
+ assign(xstate_bv_2, binop(Iop_And64, mkexpr(xstate_bv), mkU64(4)));
+ assign(restore_2, binop(Iop_And64, mkexpr(rfbm_2), mkexpr(xstate_bv_2)));
+
+ IRExpr* rfbm_2e = binop(Iop_CmpNE64, mkexpr(rfbm_2), mkU64(0));
+ IRExpr* restore_2e = binop(Iop_CmpNE64, mkexpr(restore_2), mkU64(0));
+
+ for (reg = 0; reg < 16; reg++) {
+ putGuarded(ymmGuestRegLane128offset(reg, 1), rfbm_2e, mkV128(0));
+ }
+
+ for (reg = 0; reg < 16; reg++) {
+ IRExpr* ea = binop(Iop_Add64, mkexpr(addr), mkU64(576 + reg * 16));
+ IRExpr* alt = getYMMRegLane128(reg, 1);
+ IRTemp loadedValue = newTemp(Ity_V128);
+ stmt( IRStmt_LoadG(Iend_LE,
+ ILGop_IdentV128,
+ loadedValue, ea, alt, restore_2e) );
+ putYMMRegLane128(reg, 1, mkexpr(loadedValue));
+ }
+}
+
+
+static Long dis_XRSTOR ( const VexAbiInfo* vbi,
+ Prefix pfx, Long delta, Int sz )
+{
+ /* As with XRSTOR above we ignore the value of REX.W since we're
+ not bothering with the FPU DP and IP fields. */
+ IRTemp addr = IRTemp_INVALID;
+ Int alen = 0;
+ HChar dis_buf[50];
+ UChar modrm = getUChar(delta);
+ vassert(!epartIsReg(modrm)); /* ensured by caller */
+ vassert(sz == 4 || sz == 8); /* ditto */
+
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ delta += alen;
+ gen_SEGV_if_not_64_aligned(addr);
+
+ DIP("%sxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf);
+
+ /* VEX's caller is assumed to have checked this. */
+ const ULong aSSUMED_XCR0_VALUE = 7;
+
+ IRTemp rfbm = newTemp(Ity_I64);
+ assign(rfbm,
+ binop(Iop_And64,
+ binop(Iop_Or64,
+ binop(Iop_Shl64,
+ unop(Iop_32Uto64, getIRegRDX(4)), mkU8(32)),
+ unop(Iop_32Uto64, getIRegRAX(4))),
+ mkU64(aSSUMED_XCR0_VALUE)));
+
+ IRTemp xstate_bv = newTemp(Ity_I64);
+ assign(xstate_bv, loadLE(Ity_I64,
+ binop(Iop_Add64, mkexpr(addr), mkU64(512+0))));
+
+ IRTemp xcomp_bv = newTemp(Ity_I64);
+ assign(xcomp_bv, loadLE(Ity_I64,
+ binop(Iop_Add64, mkexpr(addr), mkU64(512+8))));
+
+ IRTemp xsavehdr_23_16 = newTemp(Ity_I64);
+ assign( xsavehdr_23_16,
+ loadLE(Ity_I64,
+ binop(Iop_Add64, mkexpr(addr), mkU64(512+16))));
+
+ /* We must fault if
+ * xcomp_bv[63] == 1, since this simulated CPU does not support
+ the compaction extension.
+ * xstate_bv sets a bit outside of XCR0 (which we assume to be 7).
+ * any of the xsave header bytes 23 .. 8 are nonzero. This seems to
+ imply that xcomp_bv must be zero.
+ xcomp_bv is header bytes 15 .. 8 and xstate_bv is header bytes 7 .. 0
+ */
+ IRTemp fault_if_nonzero = newTemp(Ity_I64);
+ assign(fault_if_nonzero,
+ binop(Iop_Or64,
+ binop(Iop_And64, mkexpr(xstate_bv), mkU64(~aSSUMED_XCR0_VALUE)),
+ binop(Iop_Or64, mkexpr(xcomp_bv), mkexpr(xsavehdr_23_16))));
+ stmt( IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(fault_if_nonzero), mkU64(0)),
+ Ijk_SigSEGV,
+ IRConst_U64(guest_RIP_curr_instr),
+ OFFB_RIP
+ ));
+
+ /* We are guaranteed now that both xstate_bv and rfbm are in the
+ range 0 .. 7. Generate the restore sequence proper. */
+ gen_XRSTOR_SEQUENCE(addr, xstate_bv, rfbm);
+
+ return delta;
+}
+
+
+static Long dis_FXRSTOR ( const VexAbiInfo* vbi,
+ Prefix pfx, Long delta, Int sz )
+{
+ /* As with FXSAVE above we ignore the value of REX.W since we're
+ not bothering with the FPU DP and IP fields. */
+ IRTemp addr = IRTemp_INVALID;
+ Int alen = 0;
+ HChar dis_buf[50];
+ UChar modrm = getUChar(delta);
+ vassert(!epartIsReg(modrm)); /* ensured by caller */
+ vassert(sz == 4 || sz == 8); /* ditto */
+
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ delta += alen;
+ gen_SEGV_if_not_16_aligned(addr);
+
+ DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf);
+
+ /* FXRSTOR is just XRSTOR with components 0 and 1 selected and also
+ as if components 0 and 1 are set as present in XSTATE_BV in the
+ XSAVE header. Set both rfbm and xstate_bv to 0b011 therefore,
+ generate the XRSTOR sequence accordingly, and let iropt fold out
+ the unused (AVX) parts accordingly. */
+ IRTemp three = newTemp(Ity_I64);
+ assign(three, mkU64(3));
+ gen_XRSTOR_SEQUENCE(addr, three/*xstate_bv*/, three/*rfbm*/);
+
+ return delta;
+}
+
+
static IRTemp math_PINSRW_128 ( IRTemp v128, IRTemp u16, UInt imm8 )
{
vassert(imm8 >= 0 && imm8 <= 7);
__attribute__((noinline))
static
Long dis_ESC_0F__SSE2 ( Bool* decode_OK,
+ const VexArchInfo* archinfo,
const VexAbiInfo* vbi,
Prefix pfx, Int sz, Long deltaIN,
DisResult* dres )
delta = dis_LDMXCSR(vbi, pfx, delta, False/*!isAvx*/);
goto decode_success;
}
- /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory.
- Note that the presence or absence of REX.W slightly affects the
- written format: whether the saved FPU IP and DP pointers are 64
- or 32 bits. But the helper function we call simply writes zero
- bits in the relevant fields (which are 64 bits regardless of
- what REX.W is) and so it's good enough (iow, equally broken) in
- both cases. */
+ /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */
if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
&& !epartIsReg(getUChar(delta))
&& gregOfRexRM(pfx,getUChar(delta)) == 0) {
- IRDirty* d;
- modrm = getUChar(delta);
- vassert(!epartIsReg(modrm));
-
- addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
- delta += alen;
- gen_SEGV_if_not_16_aligned(addr);
-
- DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf);
-
- /* Uses dirty helper:
- void amd64g_do_FXSAVE_ALL_EXCEPT_XMM ( VexGuestAMD64State*,
- ULong ) */
- d = unsafeIRDirty_0_N (
- 0/*regparms*/,
- "amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM",
- &amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM,
- mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
- );
-
- /* declare we're writing memory */
- d->mFx = Ifx_Write;
- d->mAddr = mkexpr(addr);
- d->mSize = 464; /* according to recent Intel docs */
-
- /* declare we're reading guest state */
- d->nFxState = 6;
- vex_bzero(&d->fxState, sizeof(d->fxState));
-
- d->fxState[0].fx = Ifx_Read;
- d->fxState[0].offset = OFFB_FTOP;
- d->fxState[0].size = sizeof(UInt);
-
- d->fxState[1].fx = Ifx_Read;
- d->fxState[1].offset = OFFB_FPREGS;
- d->fxState[1].size = 8 * sizeof(ULong);
-
- d->fxState[2].fx = Ifx_Read;
- d->fxState[2].offset = OFFB_FPTAGS;
- d->fxState[2].size = 8 * sizeof(UChar);
-
- d->fxState[3].fx = Ifx_Read;
- d->fxState[3].offset = OFFB_FPROUND;
- d->fxState[3].size = sizeof(ULong);
-
- d->fxState[4].fx = Ifx_Read;
- d->fxState[4].offset = OFFB_FC3210;
- d->fxState[4].size = sizeof(ULong);
-
- d->fxState[5].fx = Ifx_Read;
- d->fxState[5].offset = OFFB_SSEROUND;
- d->fxState[5].size = sizeof(ULong);
-
- /* Call the helper. This creates all parts of the in-memory
- image except for the XMM[0..15] array, which we do
- separately, in order that any undefinedness in the XMM
- registers is tracked separately by Memcheck and does not
- "infect" the in-memory shadow for the other parts of the
- image (FPTOP, FPREGS, FPTAGS, FPROUND, FC3210,
- SSEROUND). */
- stmt( IRStmt_Dirty(d) );
-
- /* And now the XMMs themselves. */
- UInt xmm;
- for (xmm = 0; xmm < 16; xmm++) {
- storeLE( binop(Iop_Add64, mkexpr(addr), mkU64(160 + xmm * 16)),
- getXMMReg(xmm) );
- }
-
+ delta = dis_FXSAVE(vbi, pfx, delta, sz);
goto decode_success;
}
- /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory.
- As with FXSAVE above we ignore the value of REX.W since we're
- not bothering with the FPU DP and IP fields. */
+ /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */
if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
&& !epartIsReg(getUChar(delta))
&& gregOfRexRM(pfx,getUChar(delta)) == 1) {
- IRDirty* d;
- modrm = getUChar(delta);
- vassert(!epartIsReg(modrm));
-
- addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
- delta += alen;
- gen_SEGV_if_not_16_aligned(addr);
-
- DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf);
-
- /* Uses dirty helper:
- VexEmNote amd64g_do_FXRSTOR_ALL_EXCEPT_XMM ( VexGuestAMD64State*,
- ULong )
- NOTE:
- the VexEmNote value is simply ignored
- */
- d = unsafeIRDirty_0_N (
- 0/*regparms*/,
- "amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM",
- &amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM,
- mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
- );
-
- /* declare we're reading memory */
- d->mFx = Ifx_Read;
- d->mAddr = mkexpr(addr);
- d->mSize = 464; /* according to recent Intel docs */
-
- /* declare we're writing guest state */
- d->nFxState = 6;
- vex_bzero(&d->fxState, sizeof(d->fxState));
-
- d->fxState[0].fx = Ifx_Write;
- d->fxState[0].offset = OFFB_FTOP;
- d->fxState[0].size = sizeof(UInt);
-
- d->fxState[1].fx = Ifx_Write;
- d->fxState[1].offset = OFFB_FPREGS;
- d->fxState[1].size = 8 * sizeof(ULong);
-
- d->fxState[2].fx = Ifx_Write;
- d->fxState[2].offset = OFFB_FPTAGS;
- d->fxState[2].size = 8 * sizeof(UChar);
-
- d->fxState[3].fx = Ifx_Write;
- d->fxState[3].offset = OFFB_FPROUND;
- d->fxState[3].size = sizeof(ULong);
-
- d->fxState[4].fx = Ifx_Write;
- d->fxState[4].offset = OFFB_FC3210;
- d->fxState[4].size = sizeof(ULong);
-
- d->fxState[5].fx = Ifx_Write;
- d->fxState[5].offset = OFFB_SSEROUND;
- d->fxState[5].size = sizeof(ULong);
-
- /* Call the helper. This reads all parts of the in-memory
- image except for the XMM[0..15] array, which we do
- separately, in order that any undefinedness in the XMM
- registers is tracked separately by Memcheck and does not
- "infect" the in-guest-state shadow for the other parts of the
- image (FPTOP, FPREGS, FPTAGS, FPROUND, FC3210,
- SSEROUND). */
- stmt( IRStmt_Dirty(d) );
-
- /* And now the XMMs themselves. */
- UInt xmm;
- for (xmm = 0; xmm < 16; xmm++) {
- putXMMReg(xmm, loadLE(Ity_V128,
- binop(Iop_Add64, mkexpr(addr),
- mkU64(160 + xmm * 16))));
- }
-
+ delta = dis_FXRSTOR(vbi, pfx, delta, sz);
+ goto decode_success;
+ }
+ /* 0F AE /4 = XSAVE mem -- write x87, SSE, AVX state to memory */
+ if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
+ && !epartIsReg(getUChar(delta))
+ && gregOfRexRM(pfx,getUChar(delta)) == 4
+ && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
+ delta = dis_XSAVE(vbi, pfx, delta, sz);
+ goto decode_success;
+ }
+ /* 0F AE /5 = XRSTOR mem -- read x87, SSE, AVX state from memory */
+ if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
+ && !epartIsReg(getUChar(delta))
+ && gregOfRexRM(pfx,getUChar(delta)) == 5
+ && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
+ delta = dis_XRSTOR(vbi, pfx, delta, sz);
goto decode_success;
}
break;
const HChar* fName = NULL;
void* fAddr = NULL;
- /* JRS 2014-11-11: this a really horrible temp kludge to work
- around the fact that the Yosemite (OSX 10.10)
- /usr/lib/system/libdyld.dylib expects XSAVE/XRSTOR to be
- implemented, because amd64g_dirtyhelper_CPUID_avx_and_cx16
- claims they are supported, but so far they aren't. So cause
- it to fall back to a simpler CPU. The cleaner approach of
- setting CPUID(eax=1).OSXSAVE=0 and .XSAVE=0 isn't desirable
- since it will (per the official Intel guidelines) lead to
- software concluding that AVX isn't supported.
-
- This is also a kludge in that putting these ifdefs here checks
- the build (host) architecture, when really we're checking the
- guest architecture. */
- Bool this_is_yosemite = False;
-# if defined(VGP_amd64_darwin) && DARWIN_VERS == DARWIN_10_10
- this_is_yosemite = True;
-# endif
-
if (haveF2orF3(pfx)) goto decode_failure;
+
/* This isn't entirely correct, CPUID should depend on the VEX
capabilities, not on the underlying CPU. See bug #324882. */
- if (!this_is_yosemite &&
- (archinfo->hwcaps & VEX_HWCAPS_AMD64_SSE3) &&
+ if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSE3) &&
(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16) &&
- (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
+ (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX2)) {
+ fName = "amd64g_dirtyhelper_CPUID_avx2";
+ fAddr = &amd64g_dirtyhelper_CPUID_avx2;
+ /* This is a Core-i7-4910-like machine */
+ }
+ else if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSE3) &&
+ (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16) &&
+ (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
fName = "amd64g_dirtyhelper_CPUID_avx_and_cx16";
fAddr = &amd64g_dirtyhelper_CPUID_avx_and_cx16;
/* This is a Core-i5-2300-like machine */
facility in 64 bit mode. */
{
Bool decode_OK = False;
- delta = dis_ESC_0F__SSE2 ( &decode_OK, vbi, pfx, sz, deltaIN, dres );
+ delta = dis_ESC_0F__SSE2 ( &decode_OK,
+ archinfo, vbi, pfx, sz, deltaIN, dres );
if (decode_OK)
return delta;
}
vassert(sz == 4 || sz == 8 || sz == 16);
return i;
}
+AMD64Instr* AMD64Instr_SseCStore ( AMD64CondCode cond,
+ HReg src, AMD64AMode* addr )
+{
+ AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
+ i->tag = Ain_SseCStore;
+ i->Ain.SseCStore.cond = cond;
+ i->Ain.SseCStore.src = src;
+ i->Ain.SseCStore.addr = addr;
+ vassert(cond != Acc_ALWAYS);
+ return i;
+}
+AMD64Instr* AMD64Instr_SseCLoad ( AMD64CondCode cond,
+ AMD64AMode* addr, HReg dst )
+{
+ AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
+ i->tag = Ain_SseCLoad;
+ i->Ain.SseCLoad.cond = cond;
+ i->Ain.SseCLoad.addr = addr;
+ i->Ain.SseCLoad.dst = dst;
+ vassert(cond != Acc_ALWAYS);
+ return i;
+}
AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg reg, AMD64AMode* addr )
{
AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
ppAMD64AMode(i->Ain.SseLdSt.addr);
}
return;
+ case Ain_SseCStore:
+ vex_printf("if (%%rflags.%s) { ",
+ showAMD64CondCode(i->Ain.SseCStore.cond));
+ vex_printf("movups ");
+ ppHRegAMD64(i->Ain.SseCStore.src);
+ vex_printf(", ");
+ ppAMD64AMode(i->Ain.SseCStore.addr);
+ vex_printf(" }");
+ return;
+ case Ain_SseCLoad:
+ vex_printf("if (%%rflags.%s) { ",
+ showAMD64CondCode(i->Ain.SseCLoad.cond));
+ vex_printf("movups ");
+ ppAMD64AMode(i->Ain.SseCLoad.addr);
+ vex_printf(", ");
+ ppHRegAMD64(i->Ain.SseCLoad.dst);
+ vex_printf(" }");
+ return;
case Ain_SseLdzLO:
vex_printf("movs%s ", i->Ain.SseLdzLO.sz==4 ? "s" : "d");
ppAMD64AMode(i->Ain.SseLdzLO.addr);
addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead,
i->Ain.SseLdSt.reg);
return;
+ case Ain_SseCStore:
+ addRegUsage_AMD64AMode(u, i->Ain.SseCStore.addr);
+ addHRegUse(u, HRmRead, i->Ain.SseCStore.src);
+ return;
+ case Ain_SseCLoad:
+ addRegUsage_AMD64AMode(u, i->Ain.SseCLoad.addr);
+ addHRegUse(u, HRmModify, i->Ain.SseCLoad.dst);
+ return;
case Ain_SseLdzLO:
addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr);
addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg);
mapReg(m, &i->Ain.SseLdSt.reg);
mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr);
break;
+ case Ain_SseCStore:
+ mapRegs_AMD64AMode(m, i->Ain.SseCStore.addr);
+ mapReg(m, &i->Ain.SseCStore.src);
+ return;
+ case Ain_SseCLoad:
+ mapRegs_AMD64AMode(m, i->Ain.SseCLoad.addr);
+ mapReg(m, &i->Ain.SseCLoad.dst);
+ return;
case Ain_SseLdzLO:
mapReg(m, &i->Ain.SseLdzLO.reg);
mapRegs_AMD64AMode(m, i->Ain.SseLdzLO.addr);
UChar* p = &buf[0];
UChar* ptmp;
Int j;
- vassert(nbuf >= 32);
+ vassert(nbuf >= 64);
vassert(mode64 == True);
/* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */
*p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
break;
case RLPri_2Int:
- vassert(0); //ATC
+ goto bad; //ATC
// movabsq $0x5555555555555555, %rax
*p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
// movq %rax, %rdx
*p++ = 0x48; *p++ = 0x89; *p++ = 0xC2;
+ break;
+ case RLPri_V128SpRel:
+ if (i->Ain.Call.rloc.spOff == 0) {
+ // We could accept any |spOff| here, but that's more
+ // hassle and the only value we're ever going to get
+ // is zero (I believe.) Hence take the easy path :)
+ // We need a scag register -- r11 can be it.
+ // movabsq $0x5555555555555555, %r11
+ *p++ = 0x49; *p++ = 0xBB;
+ p = emit64(p, 0x5555555555555555ULL);
+ // movq %r11, 0(%rsp)
+ *p++ = 0x4C; *p++ = 0x89; *p++ = 0x1C; *p++ = 0x24;
+ // movq %r11, 8(%rsp)
+ *p++ = 0x4C; *p++ = 0x89; *p++ = 0x5C; *p++ = 0x24;
+ *p++ = 0x08;
+ break;
+ }
+ goto bad; //ATC for all other spOff values
+ case RLPri_V256SpRel:
+ goto bad; //ATC
case RLPri_None: case RLPri_INVALID: default:
- vassert(0);
+ vassert(0); // should never get here
}
// after:
}
case Ain_CStore: {
- /* AFAICS this is identical to Ain_CStore except that the opcode
+ /* AFAICS this is identical to Ain_CLoad except that the opcode
is 0x89 instead of 0x8B. */
vassert(i->Ain.CStore.cond != Acc_ALWAYS);
i->Ain.SseLdSt.addr);
goto done;
+ case Ain_SseCStore: {
+ vassert(i->Ain.SseCStore.cond != Acc_ALWAYS);
+
+ /* Use ptmp for backpatching conditional jumps. */
+ ptmp = NULL;
+
+ /* jmp fwds if !condition */
+ *p++ = toUChar(0x70 + (0xF & (i->Ain.SseCStore.cond ^ 1)));
+ ptmp = p; /* fill in this bit later */
+ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
+
+ /* Now the store. */
+ *p++ = clearWBit(
+ rexAMode_M_enc(vregEnc3210(i->Ain.SseCStore.src),
+ i->Ain.SseCStore.addr));
+ *p++ = 0x0F;
+ *p++ = toUChar(0x11);
+ p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseCStore.src),
+ i->Ain.SseCStore.addr);
+
+ /* Fix up the conditional branch */
+ Int delta = p - ptmp;
+ vassert(delta > 0 && delta < 40);
+ *ptmp = toUChar(delta-1);
+ goto done;
+ }
+
+ case Ain_SseCLoad: {
+ vassert(i->Ain.SseCLoad.cond != Acc_ALWAYS);
+
+ /* Use ptmp for backpatching conditional jumps. */
+ ptmp = NULL;
+
+ /* jmp fwds if !condition */
+ *p++ = toUChar(0x70 + (0xF & (i->Ain.SseCLoad.cond ^ 1)));
+ ptmp = p; /* fill in this bit later */
+ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
+
+ /* Now the load. */
+ *p++ = clearWBit(
+ rexAMode_M_enc(vregEnc3210(i->Ain.SseCLoad.dst),
+ i->Ain.SseCLoad.addr));
+ *p++ = 0x0F;
+ *p++ = toUChar(0x10);
+ p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseCLoad.dst),
+ i->Ain.SseCLoad.addr);
+
+ /* Fix up the conditional branch */
+ Int delta = p - ptmp;
+ vassert(delta > 0 && delta < 40);
+ *ptmp = toUChar(delta-1);
+ goto done;
+ }
+
case Ain_SseLdzLO:
vassert(i->Ain.SseLdzLO.sz == 4 || i->Ain.SseLdzLO.sz == 8);
/* movs[sd] amode, %xmm-dst */
/*NOTREACHED*/
done:
- vassert(p - &buf[0] <= 32);
+ vassert(p - &buf[0] <= 64);
return p - &buf[0];
}
Ain_SseSDSS, /* scalar float32 to/from float64 */
Ain_SseLdSt, /* SSE load/store 32/64/128 bits, no alignment
constraints, upper 96/64/0 bits arbitrary */
+ Ain_SseCStore, /* SSE conditional store, 128 bit only, any alignment */
+ Ain_SseCLoad, /* SSE conditional load, 128 bit only, any alignment */
Ain_SseLdzLO, /* SSE load low 32/64 bits, zero remainder of reg */
Ain_Sse32Fx4, /* SSE binary, 32Fx4 */
Ain_Sse32FLo, /* SSE binary, 32F in lowest lane only */
HReg reg;
AMD64AMode* addr;
} SseLdSt;
+ struct {
+ AMD64CondCode cond; /* may not be Acc_ALWAYS */
+ HReg src;
+ AMD64AMode* addr;
+ } SseCStore;
+ struct {
+ AMD64CondCode cond; /* may not be Acc_ALWAYS */
+ AMD64AMode* addr;
+ HReg dst;
+ } SseCLoad;
struct {
Int sz; /* 4 or 8 only */
HReg reg;
extern AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst );
extern AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst );
extern AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz, HReg, AMD64AMode* );
+extern AMD64Instr* AMD64Instr_SseCStore ( AMD64CondCode, HReg, AMD64AMode* );
+extern AMD64Instr* AMD64Instr_SseCLoad ( AMD64CondCode, AMD64AMode*, HReg );
extern AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg, AMD64AMode* );
extern AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp, HReg, HReg );
extern AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp, HReg, HReg );
UChar szB = 0; /* invalid */
switch (lg->cvt) {
- case ILGop_Ident32: szB = 4; break;
- case ILGop_Ident64: szB = 8; break;
+ case ILGop_Ident32: szB = 4; break;
+ case ILGop_Ident64: szB = 8; break;
+ case ILGop_IdentV128: szB = 16; break;
default: break;
}
if (szB == 0)
goto stmt_fail;
- AMD64AMode* amAddr = iselIntExpr_AMode(env, lg->addr);
- HReg rAlt = iselIntExpr_R(env, lg->alt);
- HReg rDst = lookupIRTemp(env, lg->dst);
+ AMD64AMode* amAddr
+ = iselIntExpr_AMode(env, lg->addr);
+ HReg rAlt
+ = szB == 16 ? iselVecExpr(env, lg->alt)
+ : iselIntExpr_R(env, lg->alt);
+ HReg rDst
+ = lookupIRTemp(env, lg->dst);
+
/* Get the alt value into the dst. We'll do a conditional load
which overwrites it -- or not -- with loaded data. */
- addInstr(env, mk_iMOVsd_RR(rAlt, rDst));
+ if (szB == 16) {
+ addInstr(env, mk_vMOVsd_RR(rAlt, rDst));
+ } else {
+ addInstr(env, mk_iMOVsd_RR(rAlt, rDst));
+ }
AMD64CondCode cc = iselCondCode(env, lg->guard);
- addInstr(env, AMD64Instr_CLoad(cc, szB, amAddr, rDst));
+ if (szB == 16) {
+ addInstr(env, AMD64Instr_SseCLoad(cc, amAddr, rDst));
+ } else {
+ addInstr(env, AMD64Instr_CLoad(cc, szB, amAddr, rDst));
+ }
return;
}
UChar szB = 0; /* invalid */
switch (typeOfIRExpr(env->type_env, sg->data)) {
- case Ity_I32: szB = 4; break;
- case Ity_I64: szB = 8; break;
+ case Ity_I32: szB = 4; break;
+ case Ity_I64: szB = 8; break;
+ case Ity_V128: szB = 16; break;
default: break;
}
if (szB == 0)
goto stmt_fail;
- AMD64AMode* amAddr = iselIntExpr_AMode(env, sg->addr);
- HReg rSrc = iselIntExpr_R(env, sg->data);
- AMD64CondCode cc = iselCondCode(env, sg->guard);
- addInstr(env, AMD64Instr_CStore(cc, szB, rSrc, amAddr));
+ AMD64AMode* amAddr
+ = iselIntExpr_AMode(env, sg->addr);
+ HReg rSrc
+ = szB == 16 ? iselVecExpr(env, sg->data)
+ : iselIntExpr_R(env, sg->data);
+ AMD64CondCode cc
+ = iselCondCode(env, sg->guard);
+ if (szB == 16) {
+ addInstr(env, AMD64Instr_SseCStore(cc, rSrc, amAddr));
+ } else {
+ addInstr(env, AMD64Instr_CStore(cc, szB, rSrc, amAddr));
+ }
return;
}
void ppIRLoadGOp ( IRLoadGOp cvt )
{
switch (cvt) {
- case ILGop_INVALID: vex_printf("ILGop_INVALID"); break;
- case ILGop_Ident64: vex_printf("Ident64"); break;
- case ILGop_Ident32: vex_printf("Ident32"); break;
- case ILGop_16Uto32: vex_printf("16Uto32"); break;
- case ILGop_16Sto32: vex_printf("16Sto32"); break;
- case ILGop_8Uto32: vex_printf("8Uto32"); break;
- case ILGop_8Sto32: vex_printf("8Sto32"); break;
+ case ILGop_INVALID: vex_printf("ILGop_INVALID"); break;
+ case ILGop_IdentV128: vex_printf("IdentV128"); break;
+ case ILGop_Ident64: vex_printf("Ident64"); break;
+ case ILGop_Ident32: vex_printf("Ident32"); break;
+ case ILGop_16Uto32: vex_printf("16Uto32"); break;
+ case ILGop_16Sto32: vex_printf("16Sto32"); break;
+ case ILGop_8Uto32: vex_printf("8Uto32"); break;
+ case ILGop_8Sto32: vex_printf("8Sto32"); break;
default: vpanic("ppIRLoadGOp");
}
}
/*OUT*/IRType* t_res, /*OUT*/IRType* t_arg )
{
switch (cvt) {
+ case ILGop_IdentV128:
+ *t_res = Ity_V128; *t_arg = Ity_V128; break;
case ILGop_Ident64:
*t_res = Ity_I64; *t_arg = Ity_I64; break;
case ILGop_Ident32:
case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
case Iop_XorV128:
case Iop_AndV128: return IRExpr_Const(IRConst_V128(0));
+ case Iop_XorV256:
case Iop_AndV256: return IRExpr_Const(IRConst_V256(0));
default: vpanic("mkZeroOfPrimopResultType: bad primop");
}
case Iop_Xor32:
case Iop_Xor64:
case Iop_XorV128:
+ case Iop_XorV256:
/* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */
if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
typeOfIRLoadGOp(lg->cvt, &cvtRes, &cvtArg);
IROp cvtOp = Iop_INVALID;
switch (lg->cvt) {
+ case ILGop_IdentV128:
+ case ILGop_Ident64:
case ILGop_Ident32: break;
case ILGop_8Uto32: cvtOp = Iop_8Uto32; break;
case ILGop_8Sto32: cvtOp = Iop_8Sto32; break;
typedef
enum {
ILGop_INVALID=0x1D00,
+ ILGop_IdentV128, /* 128 bit vector, no conversion */
ILGop_Ident64, /* 64 bit, no conversion */
ILGop_Ident32, /* 32 bit, no conversion */
ILGop_16Uto32, /* 16 bit load, Z-widen to 32 */