]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Remove specialized word-length popcount implementations.
authorNathan Bossart <nathan@postgresql.org>
Thu, 12 Feb 2026 17:32:49 +0000 (11:32 -0600)
committerNathan Bossart <nathan@postgresql.org>
Thu, 12 Feb 2026 17:32:49 +0000 (11:32 -0600)
The uses of these functions do not justify the level of
micro-optimization we've done and may even hurt performance in some
cases (e.g., due to using function pointers).  This commit removes
all architecture-specific implementations of pg_popcount{32,64} and
converts the portable ones to inlined functions in pg_bitutils.h.
These inlined versions should produce the same code as before (but
inlined), so in theory this is a net gain for many machines.  A
follow-up commit will replace the remaining loops over these
word-length popcount functions with calls to pg_popcount(), further
reducing the need for architecture-specific implementations.

Suggested-by: John Naylor <johncnaylorls@gmail.com>
Reviewed-by: John Naylor <johncnaylorls@gmail.com>
Reviewed-by: Greg Burd <greg@burd.me>
Discussion: https://postgr.es/m/CANWCAZY7R%2Biy%2Br9YM_sySNydHzNqUirx1xk0tB3ej5HO62GdgQ%40mail.gmail.com

src/include/port/pg_bitutils.h
src/port/pg_bitutils.c
src/port/pg_popcount_aarch64.c
src/port/pg_popcount_x86.c

index 20c11b79c61e6ea12b9dbd0cbe3a94eacb9147c5..789663edd93040dedbab8a73341437a4b727d13d 100644 (file)
@@ -276,46 +276,73 @@ pg_ceil_log2_64(uint64 num)
                return pg_leftmost_one_pos64(num - 1) + 1;
 }
 
-extern int     pg_popcount32_portable(uint32 word);
-extern int     pg_popcount64_portable(uint64 word);
 extern uint64 pg_popcount_portable(const char *buf, int bytes);
 extern uint64 pg_popcount_masked_portable(const char *buf, int bytes, bits8 mask);
 
-#ifdef HAVE_X86_64_POPCNTQ
+#if defined(HAVE_X86_64_POPCNTQ) || defined(USE_SVE_POPCNT_WITH_RUNTIME_CHECK)
 /*
- * Attempt to use SSE4.2 or AVX-512 instructions, but perform a runtime check
+ * Attempt to use specialized CPU instructions, but perform a runtime check
  * first.
  */
-extern PGDLLIMPORT int (*pg_popcount32) (uint32 word);
-extern PGDLLIMPORT int (*pg_popcount64) (uint64 word);
 extern PGDLLIMPORT uint64 (*pg_popcount_optimized) (const char *buf, int bytes);
 extern PGDLLIMPORT uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask);
 
-#elif defined(USE_NEON)
-/* Use the Neon version of pg_popcount{32,64} without function pointer. */
-extern int     pg_popcount32(uint32 word);
-extern int     pg_popcount64(uint64 word);
-
-/*
- * We can try to use an SVE-optimized pg_popcount() on some systems  For that,
- * we do use a function pointer.
- */
-#ifdef USE_SVE_POPCNT_WITH_RUNTIME_CHECK
-extern PGDLLIMPORT uint64 (*pg_popcount_optimized) (const char *buf, int bytes);
-extern PGDLLIMPORT uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask);
 #else
+/* Use a portable implementation -- no need for a function pointer. */
 extern uint64 pg_popcount_optimized(const char *buf, int bytes);
 extern uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask);
+
 #endif
 
-#else
-/* Use a portable implementation -- no need for a function pointer. */
-extern int     pg_popcount32(uint32 word);
-extern int     pg_popcount64(uint64 word);
-extern uint64 pg_popcount_optimized(const char *buf, int bytes);
-extern uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask);
+/*
+ * pg_popcount32
+ *             Return the number of 1 bits set in word
+ */
+static inline int
+pg_popcount32(uint32 word)
+{
+#ifdef HAVE__BUILTIN_POPCOUNT
+       return __builtin_popcount(word);
+#else                                                  /* !HAVE__BUILTIN_POPCOUNT */
+       int                     result = 0;
+
+       while (word != 0)
+       {
+               result += pg_number_of_ones[word & 255];
+               word >>= 8;
+       }
 
+       return result;
+#endif                                                 /* HAVE__BUILTIN_POPCOUNT */
+}
+
+/*
+ * pg_popcount64
+ *             Return the number of 1 bits set in word
+ */
+static inline int
+pg_popcount64(uint64 word)
+{
+#ifdef HAVE__BUILTIN_POPCOUNT
+#if SIZEOF_LONG == 8
+       return __builtin_popcountl(word);
+#elif SIZEOF_LONG_LONG == 8
+       return __builtin_popcountll(word);
+#else
+#error "cannot find integer of the same size as uint64_t"
 #endif
+#else                                                  /* !HAVE__BUILTIN_POPCOUNT */
+       int                     result = 0;
+
+       while (word != 0)
+       {
+               result += pg_number_of_ones[word & 255];
+               word >>= 8;
+       }
+
+       return result;
+#endif                                                 /* HAVE__BUILTIN_POPCOUNT */
+}
 
 /*
  * Returns the number of 1-bits in buf.
index bec06c06fc36dd1ad927efdd736e389555c15aef..49b130f1306687d5b9bf2ca2bd68e766efe53cfa 100644 (file)
@@ -96,56 +96,6 @@ const uint8 pg_number_of_ones[256] = {
        4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
 };
 
-/*
- * pg_popcount32_portable
- *             Return the number of 1 bits set in word
- */
-int
-pg_popcount32_portable(uint32 word)
-{
-#ifdef HAVE__BUILTIN_POPCOUNT
-       return __builtin_popcount(word);
-#else                                                  /* !HAVE__BUILTIN_POPCOUNT */
-       int                     result = 0;
-
-       while (word != 0)
-       {
-               result += pg_number_of_ones[word & 255];
-               word >>= 8;
-       }
-
-       return result;
-#endif                                                 /* HAVE__BUILTIN_POPCOUNT */
-}
-
-/*
- * pg_popcount64_portable
- *             Return the number of 1 bits set in word
- */
-int
-pg_popcount64_portable(uint64 word)
-{
-#ifdef HAVE__BUILTIN_POPCOUNT
-#if SIZEOF_LONG == 8
-       return __builtin_popcountl(word);
-#elif SIZEOF_LONG_LONG == 8
-       return __builtin_popcountll(word);
-#else
-#error "cannot find integer of the same size as uint64_t"
-#endif
-#else                                                  /* !HAVE__BUILTIN_POPCOUNT */
-       int                     result = 0;
-
-       while (word != 0)
-       {
-               result += pg_number_of_ones[word & 255];
-               word >>= 8;
-       }
-
-       return result;
-#endif                                                 /* HAVE__BUILTIN_POPCOUNT */
-}
-
 /*
  * pg_popcount_portable
  *             Returns the number of 1-bits in buf
@@ -163,7 +113,7 @@ pg_popcount_portable(const char *buf, int bytes)
 
                while (bytes >= 8)
                {
-                       popcnt += pg_popcount64_portable(*words++);
+                       popcnt += pg_popcount64(*words++);
                        bytes -= 8;
                }
 
@@ -197,7 +147,7 @@ pg_popcount_masked_portable(const char *buf, int bytes, bits8 mask)
 
                while (bytes >= 8)
                {
-                       popcnt += pg_popcount64_portable(*words++ & maskv);
+                       popcnt += pg_popcount64(*words++ & maskv);
                        bytes -= 8;
                }
 
@@ -220,17 +170,6 @@ pg_popcount_masked_portable(const char *buf, int bytes, bits8 mask)
  * actual external functions.  The compiler should be able to inline the
  * portable versions here.
  */
-int
-pg_popcount32(uint32 word)
-{
-       return pg_popcount32_portable(word);
-}
-
-int
-pg_popcount64(uint64 word)
-{
-       return pg_popcount64_portable(word);
-}
 
 /*
  * pg_popcount_optimized
index ba57f2cd4bd6295d06fb11cb299aba44163db946..f474ef45510aa5b056a3bd4f6a795638b838849d 100644 (file)
@@ -292,21 +292,11 @@ pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask)
 #endif                                                 /* ! USE_SVE_POPCNT_WITH_RUNTIME_CHECK */
 
 /*
- * pg_popcount32
+ * pg_popcount64_neon
  *             Return number of 1 bits in word
  */
-int
-pg_popcount32(uint32 word)
-{
-       return pg_popcount64((uint64) word);
-}
-
-/*
- * pg_popcount64
- *             Return number of 1 bits in word
- */
-int
-pg_popcount64(uint64 word)
+static inline int
+pg_popcount64_neon(uint64 word)
 {
        /*
         * For some compilers, __builtin_popcountl() already emits Neon
@@ -383,7 +373,7 @@ pg_popcount_neon(const char *buf, int bytes)
         */
        for (; bytes >= sizeof(uint64); bytes -= sizeof(uint64))
        {
-               popcnt += pg_popcount64(*((const uint64 *) buf));
+               popcnt += pg_popcount64_neon(*((const uint64 *) buf));
                buf += sizeof(uint64);
        }
 
@@ -465,7 +455,7 @@ pg_popcount_masked_neon(const char *buf, int bytes, bits8 mask)
         */
        for (; bytes >= sizeof(uint64); bytes -= sizeof(uint64))
        {
-               popcnt += pg_popcount64(*((const uint64 *) buf) & mask64);
+               popcnt += pg_popcount64_neon(*((const uint64 *) buf) & mask64);
                buf += sizeof(uint64);
        }
 
index 7aebf69898ba21a39448054fc6a852b2f02a06b5..6bce089432f938510e64c662b5f2df1e9fc57480 100644 (file)
@@ -36,8 +36,6 @@
  * operation, but in practice this is close enough, and "sse42" seems easier to
  * follow than "popcnt" for these names.
  */
-static inline int pg_popcount32_sse42(uint32 word);
-static inline int pg_popcount64_sse42(uint64 word);
 static uint64 pg_popcount_sse42(const char *buf, int bytes);
 static uint64 pg_popcount_masked_sse42(const char *buf, int bytes, bits8 mask);
 
@@ -55,12 +53,8 @@ static uint64 pg_popcount_masked_avx512(const char *buf, int bytes, bits8 mask);
  * what the current CPU supports) and then will call the pointer to fulfill the
  * caller's request.
  */
-static int     pg_popcount32_choose(uint32 word);
-static int     pg_popcount64_choose(uint64 word);
 static uint64 pg_popcount_choose(const char *buf, int bytes);
 static uint64 pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask);
-int                    (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
-int                    (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
 uint64         (*pg_popcount_optimized) (const char *buf, int bytes) = pg_popcount_choose;
 uint64         (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask) = pg_popcount_masked_choose;
 
@@ -157,7 +151,7 @@ pg_popcount_avx512_available(void)
 #endif                                                 /* USE_AVX512_POPCNT_WITH_RUNTIME_CHECK */
 
 /*
- * These functions get called on the first call to pg_popcount32 etc.
+ * These functions get called on the first call to pg_popcount(), etc.
  * They detect whether we can use the asm implementations, and replace
  * the function pointers so that subsequent calls are routed directly to
  * the chosen implementation.
@@ -167,15 +161,11 @@ choose_popcount_functions(void)
 {
        if (pg_popcount_sse42_available())
        {
-               pg_popcount32 = pg_popcount32_sse42;
-               pg_popcount64 = pg_popcount64_sse42;
                pg_popcount_optimized = pg_popcount_sse42;
                pg_popcount_masked_optimized = pg_popcount_masked_sse42;
        }
        else
        {
-               pg_popcount32 = pg_popcount32_portable;
-               pg_popcount64 = pg_popcount64_portable;
                pg_popcount_optimized = pg_popcount_portable;
                pg_popcount_masked_optimized = pg_popcount_masked_portable;
        }
@@ -189,20 +179,6 @@ choose_popcount_functions(void)
 #endif
 }
 
-static int
-pg_popcount32_choose(uint32 word)
-{
-       choose_popcount_functions();
-       return pg_popcount32(word);
-}
-
-static int
-pg_popcount64_choose(uint64 word)
-{
-       choose_popcount_functions();
-       return pg_popcount64(word);
-}
-
 static uint64
 pg_popcount_choose(const char *buf, int bytes)
 {
@@ -338,23 +314,6 @@ pg_popcount_masked_avx512(const char *buf, int bytes, bits8 mask)
 
 #endif                                                 /* USE_AVX512_POPCNT_WITH_RUNTIME_CHECK */
 
-/*
- * pg_popcount32_sse42
- *             Return the number of 1 bits set in word
- */
-static inline int
-pg_popcount32_sse42(uint32 word)
-{
-#ifdef _MSC_VER
-       return __popcnt(word);
-#else
-       uint32          res;
-
-__asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc");
-       return (int) res;
-#endif
-}
-
 /*
  * pg_popcount64_sse42
  *             Return the number of 1 bits set in word