#define HAVE__BUILTIN_CTZ 1
_ACEOF
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_popcount" >&5
-$as_echo_n "checking for __builtin_popcount... " >&6; }
-if ${pgac_cv__builtin_popcount+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-int
-call__builtin_popcount(unsigned int x)
-{
- return __builtin_popcount(x);
-}
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- pgac_cv__builtin_popcount=yes
-else
- pgac_cv__builtin_popcount=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_popcount" >&5
-$as_echo "$pgac_cv__builtin_popcount" >&6; }
-if test x"${pgac_cv__builtin_popcount}" = xyes ; then
-
-cat >>confdefs.h <<_ACEOF
-#define HAVE__BUILTIN_POPCOUNT 1
-_ACEOF
-
fi
# __builtin_frame_address may draw a diagnostic for non-constant argument,
# so it needs a different test function.
# We assume that we needn't test all widths of these explicitly:
PGAC_CHECK_BUILTIN_FUNC([__builtin_clz], [unsigned int x])
PGAC_CHECK_BUILTIN_FUNC([__builtin_ctz], [unsigned int x])
-PGAC_CHECK_BUILTIN_FUNC([__builtin_popcount], [unsigned int x])
# __builtin_frame_address may draw a diagnostic for non-constant argument,
# so it needs a different test function.
PGAC_CHECK_BUILTIN_FUNC_PTR([__builtin_frame_address], [0])
'ctz',
'constant_p',
'frame_address',
- 'popcount',
'unreachable',
]
/* Define to 1 if your compiler understands __builtin_$op_overflow. */
#undef HAVE__BUILTIN_OP_OVERFLOW
-/* Define to 1 if your compiler understands __builtin_popcount. */
-#undef HAVE__BUILTIN_POPCOUNT
-
/* Define to 1 if your compiler understands __builtin_types_compatible_p. */
#undef HAVE__BUILTIN_TYPES_COMPATIBLE_P
/*
* pg_popcount32
* Return the number of 1 bits set in word
+ *
+ * Adapted from
+ * https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel.
+ *
+ * Note that newer versions of popular compilers will automatically replace
+ * this with a special popcount instruction if possible, so we don't bother
+ * using builtin functions or intrinsics.
*/
static inline int
pg_popcount32(uint32 word)
{
-#ifdef HAVE__BUILTIN_POPCOUNT
- return __builtin_popcount(word);
-#else /* !HAVE__BUILTIN_POPCOUNT */
- int result = 0;
-
- while (word != 0)
- {
- result += pg_number_of_ones[word & 255];
- word >>= 8;
- }
-
- return result;
-#endif /* HAVE__BUILTIN_POPCOUNT */
+ word -= (word >> 1) & 0x55555555;
+ word = (word & 0x33333333) + ((word >> 2) & 0x33333333);
+ return (((word + (word >> 4)) & 0xf0f0f0f) * 0x1010101) >> 24;
}
/*
* pg_popcount64
* Return the number of 1 bits set in word
+ *
+ * Adapted from
+ * https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel.
+ *
+ * Note that newer versions of popular compilers will automatically replace
+ * this with a special popcount instruction if possible, so we don't bother
+ * using builtin functions or intrinsics.
*/
static inline int
pg_popcount64(uint64 word)
{
-#ifdef HAVE__BUILTIN_POPCOUNT
-#if SIZEOF_LONG == 8
- return __builtin_popcountl(word);
-#elif SIZEOF_LONG_LONG == 8
- return __builtin_popcountll(word);
-#else
-#error "cannot find integer of the same size as uint64_t"
-#endif
-#else /* !HAVE__BUILTIN_POPCOUNT */
- int result = 0;
-
- while (word != 0)
- {
- result += pg_number_of_ones[word & 255];
- word >>= 8;
- }
-
- return result;
-#endif /* HAVE__BUILTIN_POPCOUNT */
+ word -= (word >> 1) & UINT64CONST(0x5555555555555555);
+ word = (word & UINT64CONST(0x3333333333333333)) +
+ ((word >> 2) & UINT64CONST(0x3333333333333333));
+ word = (word + (word >> 4)) & UINT64CONST(0xf0f0f0f0f0f0f0f);
+ return (word * UINT64CONST(0x101010101010101)) >> 56;
}
/*
static inline int
pg_popcount64_neon(uint64 word)
{
- /*
- * For some compilers, __builtin_popcountl() already emits Neon
- * instructions. The line below should compile to the same code on those
- * systems.
- */
return vaddv_u8(vcnt_u8(vld1_u8((const uint8 *) &word)));
}