--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * pg_cpu.h
+ * Runtime CPU feature detection
+ *
+ * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/port/pg_cpu.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_CPU_H
+#define PG_CPU_H
+
+#if defined(USE_SSE2) || defined(__i386__)
+
+typedef enum X86FeatureId
+{
+ /* Have we run feature detection? */
+ INIT_PG_X86,
+
+ /* scalar registers and 128-bit XMM registers */
+ PG_SSE4_2,
+ PG_POPCNT,
+
+ /* 512-bit ZMM registers */
+ PG_AVX512_BW,
+ PG_AVX512_VL,
+ PG_AVX512_VPCLMULQDQ,
+ PG_AVX512_VPOPCNTDQ,
+} X86FeatureId;
+#define X86FeaturesSize (PG_AVX512_VPOPCNTDQ + 1)
+
+extern PGDLLIMPORT bool X86Features[];
+
+extern void set_x86_features(void);
+
+static inline bool
+x86_feature_available(X86FeatureId feature)
+{
+ if (X86Features[INIT_PG_X86] == false)
+ set_x86_features();
+
+ return X86Features[feature];
+}
+
+#endif /* defined(USE_SSE2) || defined(__i386__) */
+
+#endif /* PG_CPU_H */
/*-------------------------------------------------------------------------
*
* pg_cpu_x86.c
- * Choose between Intel SSE 4.2 and software CRC-32C implementation.
- *
- * On first call, checks if the CPU we're running on supports Intel SSE
- * 4.2. If it does, use the special SSE instructions for CRC-32C
- * computation. Otherwise, fall back to the pure software implementation
- * (slicing-by-8).
+ * Runtime CPU feature detection for x86
*
* Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
#include <immintrin.h>
#endif
-#include "port/pg_crc32c.h"
+#include "port/pg_cpu.h"
+
-#ifndef USE_SLICING_BY_8_CRC32C
+/* array indexed by enum X86FeatureId */
+bool X86Features[X86FeaturesSize] = {0};
/*
* Does XGETBV say the ZMM registers are enabled?
}
/*
- * This gets called on the first call. It replaces the function pointer
- * so that subsequent calls are routed directly to the chosen implementation.
+ * Parse the CPU ID info for runtime checks.
*/
-static pg_crc32c
-pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len)
+void
+set_x86_features(void)
{
unsigned int exx[4] = {0, 0, 0, 0};
- /*
- * Set fallback. We must guard since slicing-by-8 is not visible
- * everywhere.
- */
-#ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK
- pg_comp_crc32c = pg_comp_crc32c_sb8;
-#endif
-
#if defined(HAVE__GET_CPUID)
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
#elif defined(HAVE__CPUID)
#error cpuid instruction not available
#endif
- if ((exx[2] & (1 << 20)) != 0) /* SSE 4.2 */
- {
- pg_comp_crc32c = pg_comp_crc32c_sse42;
+ X86Features[PG_SSE4_2] = exx[2] >> 20 & 1;
+ X86Features[PG_POPCNT] = exx[2] >> 23 & 1;
- if (exx[2] & (1 << 27) && /* OSXSAVE */
- zmm_regs_available())
- {
- /* second cpuid call on leaf 7 to check extended AVX-512 support */
+ /* All these features depend on OSXSAVE */
+ if (exx[2] & (1 << 27))
+ {
+ /* second cpuid call on leaf 7 to check extended AVX-512 support */
- memset(exx, 0, 4 * sizeof(exx[0]));
+ memset(exx, 0, 4 * sizeof(exx[0]));
#if defined(HAVE__GET_CPUID_COUNT)
- __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
+ __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
#elif defined(HAVE__CPUIDEX)
- __cpuidex(exx, 7, 0);
+ __cpuidex(exx, 7, 0);
#endif
-#ifdef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK
- if (exx[2] & (1 << 10) && /* VPCLMULQDQ */
- exx[1] & (1 << 31)) /* AVX512-VL */
- pg_comp_crc32c = pg_comp_crc32c_avx512;
-#endif
+ if (zmm_regs_available())
+ {
+ X86Features[PG_AVX512_BW] = exx[1] >> 30 & 1;
+ X86Features[PG_AVX512_VL] = exx[1] >> 31 & 1;
+
+ X86Features[PG_AVX512_VPCLMULQDQ] = exx[2] >> 10 & 1;
+ X86Features[PG_AVX512_VPOPCNTDQ] = exx[2] >> 14 & 1;
}
}
- return pg_comp_crc32c(crc, data, len);
+ X86Features[INIT_PG_X86] = true;
}
-pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose;
-
-#endif
-
#endif /* defined(USE_SSE2) || defined(__i386__) */
#include <immintrin.h>
#endif
+#include "port/pg_cpu.h"
#include "port/pg_crc32c.h"
+static pg_crc32c pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len);
+
pg_attribute_no_sanitize_alignment()
pg_attribute_target("sse4.2")
pg_crc32c
return pg_comp_crc32c_sse42(crc0, buf, len);
}
+#endif /* USE_AVX512_CRC32C_WITH_RUNTIME_CHECK */
+
+/*
+ * This gets called on the first call. It replaces the function pointer
+ * so that subsequent calls are routed directly to the chosen implementation.
+ */
+static pg_crc32c
+pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len)
+{
+ /*
+ * Set fallback. We must guard since slicing-by-8 is not visible
+ * everywhere.
+ */
+#ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK
+ pg_comp_crc32c = pg_comp_crc32c_sb8;
+#endif
+
+ if (x86_feature_available(PG_SSE4_2))
+ pg_comp_crc32c = pg_comp_crc32c_sse42;
+
+#ifdef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK
+ if (x86_feature_available(PG_AVX512_VL) &&
+ x86_feature_available(PG_AVX512_VPCLMULQDQ))
+ pg_comp_crc32c = pg_comp_crc32c_avx512;
#endif
+
+ return pg_comp_crc32c(crc, data, len);
+}
+
+pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose;
#ifdef HAVE_X86_64_POPCNTQ
-#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
-#include <cpuid.h>
-#endif
-
#ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
#include <immintrin.h>
#endif
-#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
-#include <intrin.h>
-#endif
-
#include "port/pg_bitutils.h"
+#include "port/pg_cpu.h"
/*
* The SSE4.2 versions are built regardless of whether we are building the
uint64 (*pg_popcount_optimized) (const char *buf, int bytes) = pg_popcount_choose;
uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask) = pg_popcount_masked_choose;
-/*
- * Return true if CPUID indicates that the POPCNT instruction is available.
- */
-static bool
-pg_popcount_sse42_available(void)
-{
- unsigned int exx[4] = {0, 0, 0, 0};
-
-#if defined(HAVE__GET_CPUID)
- __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUID)
- __cpuid(exx, 1);
-#else
-#error cpuid instruction not available
-#endif
-
- return (exx[2] & (1 << 23)) != 0; /* POPCNT */
-}
#ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
-/*
- * Does CPUID say there's support for XSAVE instructions?
- */
-static inline bool
-xsave_available(void)
-{
- unsigned int exx[4] = {0, 0, 0, 0};
-
-#if defined(HAVE__GET_CPUID)
- __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUID)
- __cpuid(exx, 1);
-#else
-#error cpuid instruction not available
-#endif
- return (exx[2] & (1 << 27)) != 0; /* osxsave */
-}
-
-/*
- * Does XGETBV say the ZMM registers are enabled?
- *
- * NB: Caller is responsible for verifying that xsave_available() returns true
- * before calling this.
- */
-#ifdef HAVE_XSAVE_INTRINSICS
-pg_attribute_target("xsave")
-#endif
-static inline bool
-zmm_regs_available(void)
-{
-#ifdef HAVE_XSAVE_INTRINSICS
- return (_xgetbv(0) & 0xe6) == 0xe6;
-#else
- return false;
-#endif
-}
-
-/*
- * Does CPUID say there's support for AVX-512 popcount and byte-and-word
- * instructions?
- */
-static inline bool
-avx512_popcnt_available(void)
-{
- unsigned int exx[4] = {0, 0, 0, 0};
-
-#if defined(HAVE__GET_CPUID_COUNT)
- __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUIDEX)
- __cpuidex(exx, 7, 0);
-#else
-#error cpuid instruction not available
-#endif
- return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */
- (exx[1] & (1 << 30)) != 0; /* avx512-bw */
-}
-
/*
* Returns true if the CPU supports the instructions required for the AVX-512
* pg_popcount() implementation.
static bool
pg_popcount_avx512_available(void)
{
- return xsave_available() &&
- zmm_regs_available() &&
- avx512_popcnt_available();
+ return x86_feature_available(PG_AVX512_BW) &&
+ x86_feature_available(PG_AVX512_VPOPCNTDQ);
}
#endif /* USE_AVX512_POPCNT_WITH_RUNTIME_CHECK */
static inline void
choose_popcount_functions(void)
{
- if (pg_popcount_sse42_available())
+ if (x86_feature_available(PG_POPCNT))
{
pg_popcount_optimized = pg_popcount_sse42;
pg_popcount_masked_optimized = pg_popcount_masked_sse42;
X509_NAME_ENTRY
X509_STORE
X509_STORE_CTX
+X86FeatureId
XLTW_Oper
XLogCtlData
XLogCtlInsert