From: Oliver Kurth Date: Wed, 8 May 2019 22:27:19 +0000 (-0700) Subject: Common header file change not applicable to open-vm-tools. X-Git-Tag: stable-11.0.0~89 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2031724154177a3f5ce28d3c608709c7ad6153dc;p=thirdparty%2Fopen-vm-tools.git Common header file change not applicable to open-vm-tools. --- diff --git a/open-vm-tools/lib/include/vm_basic_asm_x86_common.h b/open-vm-tools/lib/include/vm_basic_asm_x86_common.h index 5b3f4b65b..33235a664 100644 --- a/open-vm-tools/lib/include/vm_basic_asm_x86_common.h +++ b/open-vm-tools/lib/include/vm_basic_asm_x86_common.h @@ -262,53 +262,28 @@ GetCallerEFlags(void) __GET_EAX_FROM_CPUID(1), \ X86MSR_GetMSR(MSR_BIOS_SIGN_ID)) + /* *----------------------------------------------------------------------------- * - * RDTSC_BARRIER -- - * - * Implements an RDTSC fence. Instructions executed prior to the - * fence will have completed before the fence and all stores to - * memory are flushed from the store buffer. + * MFENCE -- * - * On AMD, MFENCE is sufficient. On Intel, only LFENCE is - * documented to fence RDTSC, but LFENCE won't drain the store - * buffer. So, use MFENCE;LFENCE, which will work on both AMD and - * Intel. + * Wrapper around the MFENCE instruction. * - * It is the callers' responsibility to check for SSE2 before - * calling this function. + * Caveat Emptor! This function is _NOT_ _PORTABLE_ and most certainly + * not something you should use. Take a look at the SMP_*_BARRIER_*, + * DMA_*_BARRIER_* and MMIO_*_BARRIER_* interfaces instead, when writing + * general OS/VMM code. * * Results: * None. * * Side effects: - * Cause loads and stores prior to this to be globally visible, and - * RDTSC will not pass. + * See MFENCE instruction in Intel SDM or AMD Programmer's Manual. * *----------------------------------------------------------------------------- */ -static INLINE void -RDTSC_BARRIER(void) -{ -#ifdef __GNUC__ - __asm__ __volatile__( - "mfence \n\t" - "lfence \n\t" - ::: "memory" - ); -#elif defined _MSC_VER - /* Prevent compiler from moving code across mfence/lfence. */ - _ReadWriteBarrier(); - _mm_mfence(); - _mm_lfence(); - _ReadWriteBarrier(); -#else -#error No compiler defined for RDTSC_BARRIER -#endif -} - static INLINE void MFENCE(void) { @@ -327,6 +302,27 @@ MFENCE(void) } +/* + *----------------------------------------------------------------------------- + * + * LFENCE -- + * + * Wrapper around the LFENCE instruction. + * + * Caveat Emptor! This function is _NOT_ _PORTABLE_ and most certainly + * not something you should use. Take a look at the SMP_*_BARRIER_*, + * DMA_*_BARRIER_* and MMIO_*_BARRIER_* interfaces instead, when writing + * general OS/VMM code. + * + * Results: + * None. + * + * Side effects: + * See LFENCE instruction in Intel SDM or AMD Programmer's Manual. + * + *----------------------------------------------------------------------------- + */ + static INLINE void LFENCE(void) { @@ -345,6 +341,27 @@ LFENCE(void) } +/* + *----------------------------------------------------------------------------- + * + * SFENCE -- + * + * Wrapper around the SFENCE instruction. + * + * Caveat Emptor! This function is _NOT_ _PORTABLE_ and most certainly + * not something you should use. Take a look at the SMP_*_BARRIER_*, + * DMA_*_BARRIER_* and MMIO_*_BARRIER_* interfaces instead, when writing + * general OS/VMM code. + * + * Results: + * None. + * + * Side effects: + * See SFENCE instruction in Intel SDM or AMD Programmer's Manual. + * + *----------------------------------------------------------------------------- + */ + static INLINE void SFENCE(void) { @@ -367,6 +384,93 @@ SFENCE(void) } +/* + *----------------------------------------------------------------------------- + * + * RDTSC_BARRIER -- + * + * Implements an RDTSC fence. Instructions executed prior to the + * fence will have completed before the fence and all stores to + * memory are flushed from the store buffer. + * + * On AMD, MFENCE is sufficient. On Intel, only LFENCE is + * documented to fence RDTSC, but LFENCE won't drain the store + * buffer. So, use MFENCE;LFENCE, which will work on both AMD and + * Intel. + * + * It is the callers' responsibility to check for SSE2 before + * calling this function. + * + * Results: + * None. + * + * Side effects: + * Cause loads and stores prior to this to be globally visible, and + * RDTSC will not pass. + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +RDTSC_BARRIER(void) +{ + MFENCE(); + LFENCE(); +} + + +/* + *----------------------------------------------------------------------------- + * + * LOCKED_INSN_BARRIER -- + * + * Implements a full WB load/store barrier using a locked instruction. + * + * See PR 1674199 for details. You may choose to use this for + * performance reasons over MFENCE iff you are only dealing with + * WB memory accesses. + * + * DANGER! Do not use this barrier instead of MFENCE when dealing + * with non-temporal instructions or UC/WC memory accesses. + * + * Caveat Emptor! This function is _NOT_ _PORTABLE_ and most certainly + * not something you should use. Take a look at the SMP_*_BARRIER_*, + * DMA_*_BARRIER_* and MMIO_*_BARRIER_* interfaces instead, when writing + * general OS/VMM code. + * + * Results: + * None. + * + * Side effects: + * Cause WB loads and stores before the call to be globally visible + * before WB loads and stores after this call. + * + *----------------------------------------------------------------------------- + */ + +static INLINE void +LOCKED_INSN_BARRIER(void) +{ + volatile long temp = 0; + +#if defined __GNUC__ + __asm__ __volatile__ ( + "lock xorl $1, %0" + : "+m" (temp) + : /* no additional inputs */ + : "cc", "memory"); +#elif defined _MSC_VER + /* + * Ignore warning about _InterlockedXor operation on a local variable; we are + * using the operation for its side-effects only. + */ + #pragma warning(suppress:28113) + _InterlockedXor(&temp, 1); +#else +#error LOCKED_INSN_BARRIER not defined for this compiler +#endif +} + /* * Memory Barriers * =============== @@ -425,7 +529,6 @@ SFENCE(void) # define COMPILER_MEM_BARRIER() _ReadWriteBarrier() #endif - /* * Memory barriers. These take the form of * @@ -444,52 +547,73 @@ SFENCE(void) * * Thanks for pasting this whole comment into every architecture header. * - * On x86, we only need to care specifically about store-load reordering on - * normal memory types. In other cases, only a compiler barrier is needed. - * SMP_W_BARRIER_R is implemented with a locked xor operation (instead of the - * mfence instruction) for performance reasons. See PR 1674199 for more - * details. - * - * On x64, special instructions are only provided for load-load (lfence) and - * store-store (sfence) ordering, and they don't apply to normal memory. + * This is a simplified version of Table 7-3 (Memory Access Ordering Rules) from + * AMD AMD64 Architecture Programmer's Manual Volume 2: System Programming + * (September 2018, Publication 24593, Revision 3.30). + * + * https://www.amd.com/system/files/TechDocs/24593.pdf#page=228 + * + * This table only includes the memory types we care about in the context of + * SMP, DMA and MMIO barriers. + * + * +-------------+------+------+------+------+------+------+ + * |\ 2nd mem op | | | | | | | + * | `---------. | R WB | R UC | R WC | W WB | W UC | W WC | + * | 1st mem op \| | | | | | | + * +-------------+------+------+------+------+------+------+ + * | R WB | | | LF1 | | | | + * +-------------+------+------+------+------+------+------+ + * | R UC | | | LF1 | | | | + * +-------------+------+------+------+------+------+------+ + * | R WC | | | LF1 | | | | + * +-------------+------+------+------+------+------+------+ + * | W WB | MF1 | | MF1 | | | SF2 | + * +-------------+------+------+------+------+------+------+ + * | W UC | MF1 | | MF1 | | | SF2 | + * +-------------+------+------+------+------+------+------+ + * | W WC | MF1 | | MF1 | SF1 | | SF2 | + * +-------------+------+------+------+------+------+------+ + * + * MF1 - WB or WC load may pass a previous non-conflicting WB, WC or UC store. + * Use MFENCE. This is a combination of rules 'e' and 'i' in the AMD + * diagram. + * LF1 - WC load may pass a previous WB, WC or UC load. Use LFENCE. This is + * rule 'b' in the AMD diagram. + * SF1 - WB store may pass a previous WC store. Use SFENCE. This is rule 'j' in + * the AMD diagram. + * SF2 - WC store may pass a previous UC, WB or non-conflicting WC store. Use + * SFENCE. This is rule 'h' in the AMD diagram. + * + * To figure out the specific barrier required, pick and collapse the relevant + * rows and columns, choosing the strongest barrier. + * + * SMP barriers only concern with access to "normal memory" (write-back cached + * i.e. WB using above terminology), so we only need to worry about store-load + * reordering. In other cases a compiler barrier is sufficient. SMP store-load + * reordering is handled with a locked XOR (instead of a proper MFENCE + * instructon) for performance reasons. See PR 1674199 for more details. + * + * DMA barriers are equivalent to SMP barriers on x86. + * + * MMIO barriers are used to mix access to different memory types, so more + * reordering is possible, and is handled via LFENCE/SFENCE. Also, a proper + * MFENCE must be used instead of the locked XOR trick, due to the latter + * not guarding non-temporal/WC accesses. */ - -static INLINE void -SMP_W_BARRIER_R(void) -{ - volatile long temp = 0; - -#if defined __GNUC__ - __asm__ __volatile__ ( - "lock xorl $1, %0" - : "+m" (temp) - : /* no additional inputs */ - : "cc", "memory"); -#elif defined _MSC_VER - /* - * Ignore warning about _InterlockedXor operation on a local variable; we are - * using the operation for its side-effects only. - */ - #pragma warning(suppress:28113) - _InterlockedXor(&temp, 1); -#else -#error SMP_W_BARRIER_R not defined for this compiler -#endif -} - #define SMP_R_BARRIER_R() COMPILER_READ_BARRIER() #define SMP_R_BARRIER_W() COMPILER_MEM_BARRIER() #define SMP_R_BARRIER_RW() COMPILER_MEM_BARRIER() +#define SMP_W_BARRIER_R() LOCKED_INSN_BARRIER() #define SMP_W_BARRIER_W() COMPILER_WRITE_BARRIER() -#define SMP_W_BARRIER_RW() SMP_W_BARRIER_R() -#define SMP_RW_BARRIER_R() SMP_W_BARRIER_R() +#define SMP_W_BARRIER_RW() LOCKED_INSN_BARRIER() +#define SMP_RW_BARRIER_R() LOCKED_INSN_BARRIER() #define SMP_RW_BARRIER_W() COMPILER_MEM_BARRIER() -#define SMP_RW_BARRIER_RW() SMP_W_BARRIER_R() +#define SMP_RW_BARRIER_RW() LOCKED_INSN_BARRIER() /* * Like the above, only for use with observers other than CPUs, - * i.e. DMA masters. + * i.e. DMA masters. Same as SMP barriers for x86. */ #define DMA_R_BARRIER_R() SMP_R_BARRIER_R() @@ -503,18 +627,18 @@ SMP_W_BARRIER_R(void) #define DMA_RW_BARRIER_RW() SMP_RW_BARRIER_RW() /* - * And finally a set for use with MMIO accesses. + * And finally a set for use with MMIO accesses. These barriers must be stronger + * because they are used when mixing accesses to different memory types. */ -#define MMIO_R_BARRIER_R() SMP_R_BARRIER_R() +#define MMIO_R_BARRIER_R() LFENCE() #define MMIO_R_BARRIER_W() SMP_R_BARRIER_W() -#define MMIO_R_BARRIER_RW() SMP_R_BARRIER_RW() -#define MMIO_W_BARRIER_R() SMP_W_BARRIER_R() -#define MMIO_W_BARRIER_W() SMP_W_BARRIER_W() -#define MMIO_W_BARRIER_RW() SMP_W_BARRIER_RW() -#define MMIO_RW_BARRIER_R() SMP_RW_BARRIER_R() -#define MMIO_RW_BARRIER_W() SMP_RW_BARRIER_W() -#define MMIO_RW_BARRIER_RW() SMP_RW_BARRIER_RW() - +#define MMIO_R_BARRIER_RW() LFENCE() +#define MMIO_W_BARRIER_R() MFENCE() +#define MMIO_W_BARRIER_W() SFENCE() +#define MMIO_W_BARRIER_RW() MFENCE() +#define MMIO_RW_BARRIER_R() MFENCE() +#define MMIO_RW_BARRIER_W() SFENCE() +#define MMIO_RW_BARRIER_RW() MFENCE() #endif // _VM_BASIC_ASM_X86_COMMON_H_