__GET_EAX_FROM_CPUID(1), \
X86MSR_GetMSR(MSR_BIOS_SIGN_ID))
+
/*
*-----------------------------------------------------------------------------
*
- * RDTSC_BARRIER --
- *
- * Implements an RDTSC fence. Instructions executed prior to the
- * fence will have completed before the fence and all stores to
- * memory are flushed from the store buffer.
+ * MFENCE --
*
- * On AMD, MFENCE is sufficient. On Intel, only LFENCE is
- * documented to fence RDTSC, but LFENCE won't drain the store
- * buffer. So, use MFENCE;LFENCE, which will work on both AMD and
- * Intel.
+ * Wrapper around the MFENCE instruction.
*
- * It is the callers' responsibility to check for SSE2 before
- * calling this function.
+ * Caveat Emptor! This function is _NOT_ _PORTABLE_ and most certainly
+ * not something you should use. Take a look at the SMP_*_BARRIER_*,
+ * DMA_*_BARRIER_* and MMIO_*_BARRIER_* interfaces instead, when writing
+ * general OS/VMM code.
*
* Results:
* None.
*
* Side effects:
- * Cause loads and stores prior to this to be globally visible, and
- * RDTSC will not pass.
+ * See MFENCE instruction in Intel SDM or AMD Programmer's Manual.
*
*-----------------------------------------------------------------------------
*/
-static INLINE void
-RDTSC_BARRIER(void)
-{
-#ifdef __GNUC__
- __asm__ __volatile__(
- "mfence \n\t"
- "lfence \n\t"
- ::: "memory"
- );
-#elif defined _MSC_VER
- /* Prevent compiler from moving code across mfence/lfence. */
- _ReadWriteBarrier();
- _mm_mfence();
- _mm_lfence();
- _ReadWriteBarrier();
-#else
-#error No compiler defined for RDTSC_BARRIER
-#endif
-}
-
static INLINE void
MFENCE(void)
{
}
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * LFENCE --
+ *
+ * Wrapper around the LFENCE instruction.
+ *
+ * Caveat Emptor! This function is _NOT_ _PORTABLE_ and most certainly
+ * not something you should use. Take a look at the SMP_*_BARRIER_*,
+ * DMA_*_BARRIER_* and MMIO_*_BARRIER_* interfaces instead, when writing
+ * general OS/VMM code.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * See LFENCE instruction in Intel SDM or AMD Programmer's Manual.
+ *
+ *-----------------------------------------------------------------------------
+ */
+
static INLINE void
LFENCE(void)
{
}
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * SFENCE --
+ *
+ * Wrapper around the SFENCE instruction.
+ *
+ * Caveat Emptor! This function is _NOT_ _PORTABLE_ and most certainly
+ * not something you should use. Take a look at the SMP_*_BARRIER_*,
+ * DMA_*_BARRIER_* and MMIO_*_BARRIER_* interfaces instead, when writing
+ * general OS/VMM code.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * See SFENCE instruction in Intel SDM or AMD Programmer's Manual.
+ *
+ *-----------------------------------------------------------------------------
+ */
+
static INLINE void
SFENCE(void)
{
}
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * RDTSC_BARRIER --
+ *
+ * Implements an RDTSC fence. Instructions executed prior to the
+ * fence will have completed before the fence and all stores to
+ * memory are flushed from the store buffer.
+ *
+ * On AMD, MFENCE is sufficient. On Intel, only LFENCE is
+ * documented to fence RDTSC, but LFENCE won't drain the store
+ * buffer. So, use MFENCE;LFENCE, which will work on both AMD and
+ * Intel.
+ *
+ * It is the callers' responsibility to check for SSE2 before
+ * calling this function.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Cause loads and stores prior to this to be globally visible, and
+ * RDTSC will not pass.
+ *
+ *-----------------------------------------------------------------------------
+ */
+
+static INLINE void
+RDTSC_BARRIER(void)
+{
+ MFENCE();
+ LFENCE();
+}
+
+
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * LOCKED_INSN_BARRIER --
+ *
+ * Implements a full WB load/store barrier using a locked instruction.
+ *
+ * See PR 1674199 for details. You may choose to use this for
+ * performance reasons over MFENCE iff you are only dealing with
+ * WB memory accesses.
+ *
+ * DANGER! Do not use this barrier instead of MFENCE when dealing
+ * with non-temporal instructions or UC/WC memory accesses.
+ *
+ * Caveat Emptor! This function is _NOT_ _PORTABLE_ and most certainly
+ * not something you should use. Take a look at the SMP_*_BARRIER_*,
+ * DMA_*_BARRIER_* and MMIO_*_BARRIER_* interfaces instead, when writing
+ * general OS/VMM code.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * Cause WB loads and stores before the call to be globally visible
+ * before WB loads and stores after this call.
+ *
+ *-----------------------------------------------------------------------------
+ */
+
+static INLINE void
+LOCKED_INSN_BARRIER(void)
+{
+ volatile long temp = 0;
+
+#if defined __GNUC__
+ __asm__ __volatile__ (
+ "lock xorl $1, %0"
+ : "+m" (temp)
+ : /* no additional inputs */
+ : "cc", "memory");
+#elif defined _MSC_VER
+ /*
+ * Ignore warning about _InterlockedXor operation on a local variable; we are
+ * using the operation for its side-effects only.
+ */
+ #pragma warning(suppress:28113)
+ _InterlockedXor(&temp, 1);
+#else
+#error LOCKED_INSN_BARRIER not defined for this compiler
+#endif
+}
+
/*
* Memory Barriers
* ===============
# define COMPILER_MEM_BARRIER() _ReadWriteBarrier()
#endif
-
/*
* Memory barriers. These take the form of
*
*
* Thanks for pasting this whole comment into every architecture header.
*
- * On x86, we only need to care specifically about store-load reordering on
- * normal memory types. In other cases, only a compiler barrier is needed.
- * SMP_W_BARRIER_R is implemented with a locked xor operation (instead of the
- * mfence instruction) for performance reasons. See PR 1674199 for more
- * details.
- *
- * On x64, special instructions are only provided for load-load (lfence) and
- * store-store (sfence) ordering, and they don't apply to normal memory.
+ * This is a simplified version of Table 7-3 (Memory Access Ordering Rules) from
+ * AMD AMD64 Architecture Programmer's Manual Volume 2: System Programming
+ * (September 2018, Publication 24593, Revision 3.30).
+ *
+ * https://www.amd.com/system/files/TechDocs/24593.pdf#page=228
+ *
+ * This table only includes the memory types we care about in the context of
+ * SMP, DMA and MMIO barriers.
+ *
+ * +-------------+------+------+------+------+------+------+
+ * |\ 2nd mem op | | | | | | |
+ * | `---------. | R WB | R UC | R WC | W WB | W UC | W WC |
+ * | 1st mem op \| | | | | | |
+ * +-------------+------+------+------+------+------+------+
+ * | R WB | | | LF1 | | | |
+ * +-------------+------+------+------+------+------+------+
+ * | R UC | | | LF1 | | | |
+ * +-------------+------+------+------+------+------+------+
+ * | R WC | | | LF1 | | | |
+ * +-------------+------+------+------+------+------+------+
+ * | W WB | MF1 | | MF1 | | | SF2 |
+ * +-------------+------+------+------+------+------+------+
+ * | W UC | MF1 | | MF1 | | | SF2 |
+ * +-------------+------+------+------+------+------+------+
+ * | W WC | MF1 | | MF1 | SF1 | | SF2 |
+ * +-------------+------+------+------+------+------+------+
+ *
+ * MF1 - WB or WC load may pass a previous non-conflicting WB, WC or UC store.
+ * Use MFENCE. This is a combination of rules 'e' and 'i' in the AMD
+ * diagram.
+ * LF1 - WC load may pass a previous WB, WC or UC load. Use LFENCE. This is
+ * rule 'b' in the AMD diagram.
+ * SF1 - WB store may pass a previous WC store. Use SFENCE. This is rule 'j' in
+ * the AMD diagram.
+ * SF2 - WC store may pass a previous UC, WB or non-conflicting WC store. Use
+ * SFENCE. This is rule 'h' in the AMD diagram.
+ *
+ * To figure out the specific barrier required, pick and collapse the relevant
+ * rows and columns, choosing the strongest barrier.
+ *
+ * SMP barriers only concern with access to "normal memory" (write-back cached
+ * i.e. WB using above terminology), so we only need to worry about store-load
+ * reordering. In other cases a compiler barrier is sufficient. SMP store-load
+ * reordering is handled with a locked XOR (instead of a proper MFENCE
+ * instructon) for performance reasons. See PR 1674199 for more details.
+ *
+ * DMA barriers are equivalent to SMP barriers on x86.
+ *
+ * MMIO barriers are used to mix access to different memory types, so more
+ * reordering is possible, and is handled via LFENCE/SFENCE. Also, a proper
+ * MFENCE must be used instead of the locked XOR trick, due to the latter
+ * not guarding non-temporal/WC accesses.
*/
-
-static INLINE void
-SMP_W_BARRIER_R(void)
-{
- volatile long temp = 0;
-
-#if defined __GNUC__
- __asm__ __volatile__ (
- "lock xorl $1, %0"
- : "+m" (temp)
- : /* no additional inputs */
- : "cc", "memory");
-#elif defined _MSC_VER
- /*
- * Ignore warning about _InterlockedXor operation on a local variable; we are
- * using the operation for its side-effects only.
- */
- #pragma warning(suppress:28113)
- _InterlockedXor(&temp, 1);
-#else
-#error SMP_W_BARRIER_R not defined for this compiler
-#endif
-}
-
#define SMP_R_BARRIER_R() COMPILER_READ_BARRIER()
#define SMP_R_BARRIER_W() COMPILER_MEM_BARRIER()
#define SMP_R_BARRIER_RW() COMPILER_MEM_BARRIER()
+#define SMP_W_BARRIER_R() LOCKED_INSN_BARRIER()
#define SMP_W_BARRIER_W() COMPILER_WRITE_BARRIER()
-#define SMP_W_BARRIER_RW() SMP_W_BARRIER_R()
-#define SMP_RW_BARRIER_R() SMP_W_BARRIER_R()
+#define SMP_W_BARRIER_RW() LOCKED_INSN_BARRIER()
+#define SMP_RW_BARRIER_R() LOCKED_INSN_BARRIER()
#define SMP_RW_BARRIER_W() COMPILER_MEM_BARRIER()
-#define SMP_RW_BARRIER_RW() SMP_W_BARRIER_R()
+#define SMP_RW_BARRIER_RW() LOCKED_INSN_BARRIER()
/*
* Like the above, only for use with observers other than CPUs,
- * i.e. DMA masters.
+ * i.e. DMA masters. Same as SMP barriers for x86.
*/
#define DMA_R_BARRIER_R() SMP_R_BARRIER_R()
#define DMA_RW_BARRIER_RW() SMP_RW_BARRIER_RW()
/*
- * And finally a set for use with MMIO accesses.
+ * And finally a set for use with MMIO accesses. These barriers must be stronger
+ * because they are used when mixing accesses to different memory types.
*/
-#define MMIO_R_BARRIER_R() SMP_R_BARRIER_R()
+#define MMIO_R_BARRIER_R() LFENCE()
#define MMIO_R_BARRIER_W() SMP_R_BARRIER_W()
-#define MMIO_R_BARRIER_RW() SMP_R_BARRIER_RW()
-#define MMIO_W_BARRIER_R() SMP_W_BARRIER_R()
-#define MMIO_W_BARRIER_W() SMP_W_BARRIER_W()
-#define MMIO_W_BARRIER_RW() SMP_W_BARRIER_RW()
-#define MMIO_RW_BARRIER_R() SMP_RW_BARRIER_R()
-#define MMIO_RW_BARRIER_W() SMP_RW_BARRIER_W()
-#define MMIO_RW_BARRIER_RW() SMP_RW_BARRIER_RW()
-
+#define MMIO_R_BARRIER_RW() LFENCE()
+#define MMIO_W_BARRIER_R() MFENCE()
+#define MMIO_W_BARRIER_W() SFENCE()
+#define MMIO_W_BARRIER_RW() MFENCE()
+#define MMIO_RW_BARRIER_R() MFENCE()
+#define MMIO_RW_BARRIER_W() SFENCE()
+#define MMIO_RW_BARRIER_RW() MFENCE()
#endif // _VM_BASIC_ASM_X86_COMMON_H_