return pg_leftmost_one_pos64(num - 1) + 1;
}
-extern int pg_popcount32_portable(uint32 word);
-extern int pg_popcount64_portable(uint64 word);
extern uint64 pg_popcount_portable(const char *buf, int bytes);
extern uint64 pg_popcount_masked_portable(const char *buf, int bytes, bits8 mask);
-#ifdef HAVE_X86_64_POPCNTQ
+#if defined(HAVE_X86_64_POPCNTQ) || defined(USE_SVE_POPCNT_WITH_RUNTIME_CHECK)
/*
- * Attempt to use SSE4.2 or AVX-512 instructions, but perform a runtime check
+ * Attempt to use specialized CPU instructions, but perform a runtime check
* first.
*/
-extern PGDLLIMPORT int (*pg_popcount32) (uint32 word);
-extern PGDLLIMPORT int (*pg_popcount64) (uint64 word);
extern PGDLLIMPORT uint64 (*pg_popcount_optimized) (const char *buf, int bytes);
extern PGDLLIMPORT uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask);
-#elif defined(USE_NEON)
-/* Use the Neon version of pg_popcount{32,64} without function pointer. */
-extern int pg_popcount32(uint32 word);
-extern int pg_popcount64(uint64 word);
-
-/*
- * We can try to use an SVE-optimized pg_popcount() on some systems For that,
- * we do use a function pointer.
- */
-#ifdef USE_SVE_POPCNT_WITH_RUNTIME_CHECK
-extern PGDLLIMPORT uint64 (*pg_popcount_optimized) (const char *buf, int bytes);
-extern PGDLLIMPORT uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask);
#else
+/* Use a portable implementation -- no need for a function pointer. */
extern uint64 pg_popcount_optimized(const char *buf, int bytes);
extern uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask);
+
#endif
-#else
-/* Use a portable implementation -- no need for a function pointer. */
-extern int pg_popcount32(uint32 word);
-extern int pg_popcount64(uint64 word);
-extern uint64 pg_popcount_optimized(const char *buf, int bytes);
-extern uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask);
+/*
+ * pg_popcount32
+ * Return the number of 1 bits set in word
+ */
+static inline int
+pg_popcount32(uint32 word)
+{
+#ifdef HAVE__BUILTIN_POPCOUNT
+ return __builtin_popcount(word);
+#else /* !HAVE__BUILTIN_POPCOUNT */
+ int result = 0;
+
+ while (word != 0)
+ {
+ result += pg_number_of_ones[word & 255];
+ word >>= 8;
+ }
+ return result;
+#endif /* HAVE__BUILTIN_POPCOUNT */
+}
+
+/*
+ * pg_popcount64
+ * Return the number of 1 bits set in word
+ */
+static inline int
+pg_popcount64(uint64 word)
+{
+#ifdef HAVE__BUILTIN_POPCOUNT
+#if SIZEOF_LONG == 8
+ return __builtin_popcountl(word);
+#elif SIZEOF_LONG_LONG == 8
+ return __builtin_popcountll(word);
+#else
+#error "cannot find integer of the same size as uint64_t"
#endif
+#else /* !HAVE__BUILTIN_POPCOUNT */
+ int result = 0;
+
+ while (word != 0)
+ {
+ result += pg_number_of_ones[word & 255];
+ word >>= 8;
+ }
+
+ return result;
+#endif /* HAVE__BUILTIN_POPCOUNT */
+}
/*
* Returns the number of 1-bits in buf.
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};
-/*
- * pg_popcount32_portable
- * Return the number of 1 bits set in word
- */
-int
-pg_popcount32_portable(uint32 word)
-{
-#ifdef HAVE__BUILTIN_POPCOUNT
- return __builtin_popcount(word);
-#else /* !HAVE__BUILTIN_POPCOUNT */
- int result = 0;
-
- while (word != 0)
- {
- result += pg_number_of_ones[word & 255];
- word >>= 8;
- }
-
- return result;
-#endif /* HAVE__BUILTIN_POPCOUNT */
-}
-
-/*
- * pg_popcount64_portable
- * Return the number of 1 bits set in word
- */
-int
-pg_popcount64_portable(uint64 word)
-{
-#ifdef HAVE__BUILTIN_POPCOUNT
-#if SIZEOF_LONG == 8
- return __builtin_popcountl(word);
-#elif SIZEOF_LONG_LONG == 8
- return __builtin_popcountll(word);
-#else
-#error "cannot find integer of the same size as uint64_t"
-#endif
-#else /* !HAVE__BUILTIN_POPCOUNT */
- int result = 0;
-
- while (word != 0)
- {
- result += pg_number_of_ones[word & 255];
- word >>= 8;
- }
-
- return result;
-#endif /* HAVE__BUILTIN_POPCOUNT */
-}
-
/*
* pg_popcount_portable
* Returns the number of 1-bits in buf
while (bytes >= 8)
{
- popcnt += pg_popcount64_portable(*words++);
+ popcnt += pg_popcount64(*words++);
bytes -= 8;
}
while (bytes >= 8)
{
- popcnt += pg_popcount64_portable(*words++ & maskv);
+ popcnt += pg_popcount64(*words++ & maskv);
bytes -= 8;
}
* actual external functions. The compiler should be able to inline the
* portable versions here.
*/
-int
-pg_popcount32(uint32 word)
-{
- return pg_popcount32_portable(word);
-}
-
-int
-pg_popcount64(uint64 word)
-{
- return pg_popcount64_portable(word);
-}
/*
* pg_popcount_optimized
#endif /* ! USE_SVE_POPCNT_WITH_RUNTIME_CHECK */
/*
- * pg_popcount32
+ * pg_popcount64_neon
* Return number of 1 bits in word
*/
-int
-pg_popcount32(uint32 word)
-{
- return pg_popcount64((uint64) word);
-}
-
-/*
- * pg_popcount64
- * Return number of 1 bits in word
- */
-int
-pg_popcount64(uint64 word)
+static inline int
+pg_popcount64_neon(uint64 word)
{
/*
* For some compilers, __builtin_popcountl() already emits Neon
*/
for (; bytes >= sizeof(uint64); bytes -= sizeof(uint64))
{
- popcnt += pg_popcount64(*((const uint64 *) buf));
+ popcnt += pg_popcount64_neon(*((const uint64 *) buf));
buf += sizeof(uint64);
}
*/
for (; bytes >= sizeof(uint64); bytes -= sizeof(uint64))
{
- popcnt += pg_popcount64(*((const uint64 *) buf) & mask64);
+ popcnt += pg_popcount64_neon(*((const uint64 *) buf) & mask64);
buf += sizeof(uint64);
}
* operation, but in practice this is close enough, and "sse42" seems easier to
* follow than "popcnt" for these names.
*/
-static inline int pg_popcount32_sse42(uint32 word);
-static inline int pg_popcount64_sse42(uint64 word);
static uint64 pg_popcount_sse42(const char *buf, int bytes);
static uint64 pg_popcount_masked_sse42(const char *buf, int bytes, bits8 mask);
* what the current CPU supports) and then will call the pointer to fulfill the
* caller's request.
*/
-static int pg_popcount32_choose(uint32 word);
-static int pg_popcount64_choose(uint64 word);
static uint64 pg_popcount_choose(const char *buf, int bytes);
static uint64 pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask);
-int (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
-int (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
uint64 (*pg_popcount_optimized) (const char *buf, int bytes) = pg_popcount_choose;
uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask) = pg_popcount_masked_choose;
#endif /* USE_AVX512_POPCNT_WITH_RUNTIME_CHECK */
/*
- * These functions get called on the first call to pg_popcount32 etc.
+ * These functions get called on the first call to pg_popcount(), etc.
* They detect whether we can use the asm implementations, and replace
* the function pointers so that subsequent calls are routed directly to
* the chosen implementation.
{
if (pg_popcount_sse42_available())
{
- pg_popcount32 = pg_popcount32_sse42;
- pg_popcount64 = pg_popcount64_sse42;
pg_popcount_optimized = pg_popcount_sse42;
pg_popcount_masked_optimized = pg_popcount_masked_sse42;
}
else
{
- pg_popcount32 = pg_popcount32_portable;
- pg_popcount64 = pg_popcount64_portable;
pg_popcount_optimized = pg_popcount_portable;
pg_popcount_masked_optimized = pg_popcount_masked_portable;
}
#endif
}
-static int
-pg_popcount32_choose(uint32 word)
-{
- choose_popcount_functions();
- return pg_popcount32(word);
-}
-
-static int
-pg_popcount64_choose(uint64 word)
-{
- choose_popcount_functions();
- return pg_popcount64(word);
-}
-
static uint64
pg_popcount_choose(const char *buf, int bytes)
{
#endif /* USE_AVX512_POPCNT_WITH_RUNTIME_CHECK */
-/*
- * pg_popcount32_sse42
- * Return the number of 1 bits set in word
- */
-static inline int
-pg_popcount32_sse42(uint32 word)
-{
-#ifdef _MSC_VER
- return __popcnt(word);
-#else
- uint32 res;
-
-__asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc");
- return (int) res;
-#endif
-}
-
/*
* pg_popcount64_sse42
* Return the number of 1 bits set in word