]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Add __builtin_bitreverse{8,16,32,64} builtins [PR50481]
authorJakub Jelinek <jakub@redhat.com>
Fri, 15 May 2026 07:50:52 +0000 (09:50 +0200)
committerJakub Jelinek <jakub@gcc.gnu.org>
Fri, 15 May 2026 07:50:52 +0000 (09:50 +0200)
Future work could optimize this on specific targets:
- ARM: lower to RBIT
- x86 with GFNI: lower to vgf2p8affineqb
  https://wunkolo.github.io/post/2020/11/gf2p8affineqb-bit-reversal/

2026-05-15  Disservin  <disservin.social@gmail.com>
    Jakub Jelinek  <jakub@redhat.com>

PR target/50481
* builtin-types.def (BT_FN_UINT8_UINT8): New.
* builtins.def (BUILT_IN_BITREVERSE8, BUILT_IN_BITREVERSE16,
BUILT_IN_BITREVERSE32, BUILT_IN_BITREVERSE64): New builtins.
* builtins.cc (expand_builtin, is_inexpensive_builtin): Handle
bitreverse builtins.
* fold-const-call.cc (fold_const_call_ss): Fold bitreverse builtins.
* fold-const.cc (tree_call_nonnegative_warnv_p): Handle
bitreverse builtins.
* optabs.def (bitreverse_optab): New.
* optabs.cc (expand_bitreverse): New function.
(expand_unop): Use it for bitreverse_optab.
* tree-ssa-ccp.cc (evaluate_stmt): Handle bitreverse builtins.
* tree-ssa-phiopt.cc (empty_bb_or_one_feeding_into_p,
cond_removal_in_builtin_zero_pattern): Likewise.
* doc/extend.texi: Document __builtin_bitreverse{8,16,32,64}.
* doc/md.texi (bitreverse<mode>2): Document.

* gcc.dg/builtin-bitreverse-1.c: New test.
* gcc.dg/builtin-bitreverse-2.c: New test.

Signed-off-by: Disservin <disservin.social@gmail.com>
13 files changed:
gcc/builtin-types.def
gcc/builtins.cc
gcc/builtins.def
gcc/doc/extend.texi
gcc/doc/md.texi
gcc/fold-const-call.cc
gcc/fold-const.cc
gcc/optabs.cc
gcc/optabs.def
gcc/testsuite/gcc.dg/builtin-bitreverse-1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/builtin-bitreverse-2.c [new file with mode: 0644]
gcc/tree-ssa-ccp.cc
gcc/tree-ssa-phiopt.cc

index ab0cbc65cfcf9dec891b50dd57cb41c1e8d9422f..365badca2aa96b7f4736991a92fcb4b642d4ccc7 100644 (file)
@@ -412,6 +412,7 @@ DEF_FUNCTION_TYPE_1 (BT_FN_INT16_FLOAT, BT_INT16, BT_FLOAT)
 DEF_FUNCTION_TYPE_1 (BT_FN_UINT32_FLOAT, BT_UINT32, BT_FLOAT)
 DEF_FUNCTION_TYPE_1 (BT_FN_UINT16_FLOAT, BT_UINT16, BT_FLOAT)
 DEF_FUNCTION_TYPE_1 (BT_FN_UINT8_FLOAT, BT_UINT8, BT_FLOAT)
+DEF_FUNCTION_TYPE_1 (BT_FN_UINT8_UINT8, BT_UINT8, BT_UINT8)
 DEF_FUNCTION_TYPE_1 (BT_FN_UINT16_UINT16, BT_UINT16, BT_UINT16)
 DEF_FUNCTION_TYPE_1 (BT_FN_UINT32_UINT32, BT_UINT32, BT_UINT32)
 DEF_FUNCTION_TYPE_1 (BT_FN_UINT64_UINT64, BT_UINT64, BT_UINT64)
index 692e20088c28983f9f7bfadf706ea19c703afd95..0475bb88a6179783b63d4211052d4c2b100c6096 100644 (file)
@@ -8184,6 +8184,16 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
        return target;
       break;
 
+    case BUILT_IN_BITREVERSE8:
+    case BUILT_IN_BITREVERSE16:
+    case BUILT_IN_BITREVERSE32:
+    case BUILT_IN_BITREVERSE64:
+      target = expand_builtin_unop (target_mode, exp, target, subtarget,
+                                   bitreverse_optab);
+      if (target)
+       return target;
+      break;
+
     CASE_INT_FN (BUILT_IN_FFS):
       target = expand_builtin_unop (target_mode, exp, target,
                                    subtarget, ffs_optab);
@@ -12343,6 +12353,10 @@ is_inexpensive_builtin (tree decl)
       case BUILT_IN_BSWAP32:
       case BUILT_IN_BSWAP64:
       case BUILT_IN_BSWAP128:
+      case BUILT_IN_BITREVERSE8:
+      case BUILT_IN_BITREVERSE16:
+      case BUILT_IN_BITREVERSE32:
+      case BUILT_IN_BITREVERSE64:
       case BUILT_IN_CLZ:
       case BUILT_IN_CLZIMAX:
       case BUILT_IN_CLZL:
index 8ab0599b17f3b9096e787f20dc4cf50246751a0c..68838de55f1340c8b316dfc1169f9081af6a3bc2 100644 (file)
@@ -1024,6 +1024,10 @@ DEF_GCC_BUILTIN        (BUILT_IN_BSWAP16, "bswap16", BT_FN_UINT16_UINT16, ATTR_C
 DEF_GCC_BUILTIN        (BUILT_IN_BSWAP32, "bswap32", BT_FN_UINT32_UINT32, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_BSWAP64, "bswap64", BT_FN_UINT64_UINT64, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_BSWAP128, "bswap128", BT_FN_UINT128_UINT128, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN        (BUILT_IN_BITREVERSE8, "bitreverse8", BT_FN_UINT8_UINT8, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN        (BUILT_IN_BITREVERSE16, "bitreverse16", BT_FN_UINT16_UINT16, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN        (BUILT_IN_BITREVERSE32, "bitreverse32", BT_FN_UINT32_UINT32, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN        (BUILT_IN_BITREVERSE64, "bitreverse64", BT_FN_UINT64_UINT64, ATTR_CONST_NOTHROW_LEAF_LIST)
 
 DEF_EXT_LIB_BUILTIN    (BUILT_IN_CLEAR_CACHE, "__clear_cache", BT_FN_VOID_PTR_PTR, ATTR_NOTHROW_LEAF_LIST)
 /* [trans-mem]: Adjust BUILT_IN_TM_CALLOC if BUILT_IN_CALLOC is changed.  */
index 42f83b98a05bcb16d19080fe856fab271d0d09c7..96ed8b45f04f9f9d10af4af481c8bebf2cbd23bf 100644 (file)
@@ -16022,6 +16022,25 @@ Similar to @code{__builtin_bswap64}, except the argument and return types
 are 128-bit.  Only supported on targets when 128-bit types are supported.
 @enddefbuiltin
 
+@defbuiltin{uint8_t __builtin_bitreverse8 (uint8_t @var{x})}
+Returns @var{x} with all bits reversed.
+@enddefbuiltin
+
+@defbuiltin{uint16_t __builtin_bitreverse16 (uint16_t @var{x})}
+Similar to @code{__builtin_bitreverse8}, except the argument and return types
+are 16-bit.
+@enddefbuiltin
+
+@defbuiltin{uint32_t __builtin_bitreverse32 (uint32_t @var{x})}
+Similar to @code{__builtin_bitreverse8}, except the argument and return types
+are 32-bit.
+@enddefbuiltin
+
+@defbuiltin{uint64_t __builtin_bitreverse64 (uint64_t @var{x})}
+Similar to @code{__builtin_bitreverse8}, except the argument and return types
+are 64-bit.
+@enddefbuiltin
+
 @node CRC Builtins
 @subsection CRC Builtins
 @cindex CRC builtins
index c4fc53458d9daad8aa6a3acb0de5566326d0c8f9..94d258c3c91940f41deca7a49786ddbcde728c2f 100644 (file)
@@ -5273,6 +5273,10 @@ op0 = (narrow) (((wide) op1 + (wide) op2 + 1) >> 1);
 @item @samp{bswap@var{m}2}
 Reverse the order of bytes of operand 1 and store the result in operand 0.
 
+@mdindex bitreverse@var{m}2
+@item @samp{bitreverse@var{m}2}
+Reverse the order of bits of operand 1 and store the result in operand 0.
+
 @mdindex neg@var{m}2
 @mdindex ssneg@var{m}2
 @mdindex usneg@var{m}2
index cc2defcd5bc9385cd536f294ab2d76ce8b7b7326..d473b0dd42800715328e3885072e7e9c593c5141 100644 (file)
@@ -1102,6 +1102,14 @@ fold_const_call_ss (wide_int *result, combined_fn fn, const wide_int_ref &arg,
                                           TYPE_SIGN (arg_type)));
       return true;
 
+    case CFN_BUILT_IN_BITREVERSE8:
+    case CFN_BUILT_IN_BITREVERSE16:
+    case CFN_BUILT_IN_BITREVERSE32:
+    case CFN_BUILT_IN_BITREVERSE64:
+      *result = wi::bitreverse (wide_int::from (arg, precision,
+                                               TYPE_SIGN (arg_type)));
+      return true;
+
     default:
       return false;
     }
index 56503e570bc440b65366b0c83a958a14dd3d5712..3bc82a108dbc066fbc8a52bce40821db75b17201 100644 (file)
@@ -14984,6 +14984,10 @@ tree_call_nonnegative_warnv_p (tree type, combined_fn fn, tree arg0, tree arg1,
     case CFN_BUILT_IN_BSWAP32:
     case CFN_BUILT_IN_BSWAP64:
     case CFN_BUILT_IN_BSWAP128:
+    case CFN_BUILT_IN_BITREVERSE8:
+    case CFN_BUILT_IN_BITREVERSE16:
+    case CFN_BUILT_IN_BITREVERSE32:
+    case CFN_BUILT_IN_BITREVERSE64:
       /* Always true.  */
       return true;
 
index d17fbcf12ed645cefd3d89dd2822b0c544200d24..87c4ea682fc502dedd1d08ab63fa83c767495434 100644 (file)
@@ -2959,6 +2959,132 @@ expand_doubleword_bswap (machine_mode mode, rtx op, rtx target)
   return target;
 }
 
+/* Try calculating (bitreverse x) using masks and shifts.  */
+
+static rtx
+expand_bitreverse (scalar_int_mode mode, rtx op0, rtx target)
+{
+  unsigned int precision = GET_MODE_BITSIZE (mode);
+  rtx_insn *last;
+
+  /* Operation requires at least 4 bits (one nibble swap makes no sense below
+     that).  */
+  if (precision < 4)
+    return NULL_RTX;
+
+  if (target == NULL_RTX
+      || target == op0
+      || reg_overlap_mentioned_p (target, op0))
+    target = gen_reg_rtx (mode);
+
+  last = get_last_insn ();
+
+  rtx x, lo, hi;
+
+  /* Step 1: byte-swap (only meaningful for >= 16 bits).  */
+  if (precision >= 16)
+    {
+      x = expand_unop (mode, bswap_optab, op0, NULL_RTX, true);
+      if (x == NULL_RTX)
+       goto fail;
+    }
+  else
+    x = op0;
+
+  /* Step 2: swap nibbles within each byte (shift=4, only for >= 8 bits).  */
+  if (precision >= 8)
+    {
+      wide_int mask = wi::zero (precision);
+      for (unsigned int start = 0; start < precision; start += 8)
+       mask = wi::bit_or (mask, wi::shifted_mask (start, 4, false,
+                                                  precision));
+
+      rtx mask_rtx = immed_wide_int_const (mask, mode);
+
+      hi = expand_simple_binop (mode, LSHIFTRT, x, GEN_INT (4),
+                               NULL_RTX, true, OPTAB_LIB_WIDEN);
+      if (hi == NULL_RTX) goto fail;
+      hi = expand_binop (mode, and_optab, hi, mask_rtx,
+                        NULL_RTX, true, OPTAB_LIB_WIDEN);
+      if (hi == NULL_RTX) goto fail;
+
+      lo = expand_binop (mode, and_optab, x, mask_rtx,
+                        NULL_RTX, true, OPTAB_LIB_WIDEN);
+      if (lo == NULL_RTX) goto fail;
+      lo = expand_simple_binop (mode, ASHIFT, lo, GEN_INT (4),
+                               NULL_RTX, true, OPTAB_LIB_WIDEN);
+      if (lo == NULL_RTX) goto fail;
+
+      x = expand_binop (mode, ior_optab, hi, lo,
+                       NULL_RTX, true, OPTAB_LIB_WIDEN);
+      if (x == NULL_RTX) goto fail;
+    }
+
+  /* Step 3: swap pairs of bits within each nibble (shift=2).  */
+  {
+    wide_int mask = wi::zero (precision);
+    for (unsigned int start = 0; start < precision; start += 4)
+      mask = wi::bit_or (mask, wi::shifted_mask (start, 2, false, precision));
+
+    rtx mask_rtx = immed_wide_int_const (mask, mode);
+
+    hi = expand_simple_binop (mode, LSHIFTRT, x, GEN_INT (2),
+                             NULL_RTX, true, OPTAB_LIB_WIDEN);
+    if (hi == NULL_RTX) goto fail;
+    hi = expand_binop (mode, and_optab, hi, mask_rtx,
+                      NULL_RTX, true, OPTAB_LIB_WIDEN);
+    if (hi == NULL_RTX) goto fail;
+
+    lo = expand_binop (mode, and_optab, x, mask_rtx,
+                      NULL_RTX, true, OPTAB_LIB_WIDEN);
+    if (lo == NULL_RTX) goto fail;
+    lo = expand_simple_binop (mode, ASHIFT, lo, GEN_INT (2),
+                             NULL_RTX, true, OPTAB_LIB_WIDEN);
+    if (lo == NULL_RTX) goto fail;
+
+    x = expand_binop (mode, ior_optab, hi, lo,
+                     NULL_RTX, true, OPTAB_LIB_WIDEN);
+    if (x == NULL_RTX) goto fail;
+  }
+
+  /* Step 4: swap adjacent bits (shift=1).  */
+  {
+    wide_int mask = wi::zero (precision);
+    for (unsigned int start = 0; start < precision; start += 2)
+      mask = wi::bit_or (mask, wi::shifted_mask (start, 1, false,
+                                                precision));
+
+    rtx mask_rtx = immed_wide_int_const (mask, mode);
+
+    hi = expand_simple_binop (mode, LSHIFTRT, x, GEN_INT (1),
+                             NULL_RTX, true, OPTAB_LIB_WIDEN);
+    if (hi == NULL_RTX) goto fail;
+    hi = expand_binop (mode, and_optab, hi, mask_rtx,
+                      NULL_RTX, true, OPTAB_LIB_WIDEN);
+    if (hi == NULL_RTX) goto fail;
+
+    lo = expand_binop (mode, and_optab, x, mask_rtx,
+                      NULL_RTX, true, OPTAB_LIB_WIDEN);
+    if (lo == NULL_RTX) goto fail;
+    lo = expand_simple_binop (mode, ASHIFT, lo, GEN_INT (1),
+                             NULL_RTX, true, OPTAB_LIB_WIDEN);
+    if (lo == NULL_RTX) goto fail;
+
+    x = expand_binop (mode, ior_optab, hi, lo,
+                     target, true, OPTAB_LIB_WIDEN);
+    if (x == NULL_RTX) goto fail;
+  }
+
+  if (x != target)
+    emit_move_insn (target, x);
+
+  return target;
+
+ fail:
+  delete_insns_since (last);
+  return NULL_RTX;
+}
+
 /* Try calculating (parity x) as (and (popcount x) 1), where
    popcount can also be done in a wider mode.  */
 static rtx
@@ -3409,6 +3535,10 @@ expand_unop (machine_mode mode, optab unoptab, rtx op0, rtx target,
       goto try_libcall;
     }
 
+  if (unoptab == bitreverse_optab && is_a <scalar_int_mode> (mode, &int_mode))
+    if (rtx tem = expand_bitreverse (int_mode, op0, target))
+      return tem;
+
   /* Neg should be tried via expand_absneg_bit before widening.  */
   if (optab_to_code (unoptab) == NEG)
     {
index 193f42a728a2d414f078bce785f760cca00087ca..7ccea18543f15c45d16cc5ce7bbaa54399005567 100644 (file)
@@ -184,6 +184,7 @@ OPTAB_VC(absv_optab, "absv$I$a2", ABS)
 OPTAB_VX(absv_optab, "abs$F$a2")
 OPTAB_NL(one_cmpl_optab, "one_cmpl$a2", NOT, "one_cmpl", '2', gen_int_libfunc)
 OPTAB_NC(bswap_optab, "bswap$a2", BSWAP)
+OPTAB_NC(bitreverse_optab, "bitreverse$a2", BITREVERSE)
 OPTAB_NL(ffs_optab, "ffs$a2", FFS, "ffs", '2', gen_int_libfunc)
 OPTAB_NL(clz_optab, "clz$a2", CLZ, "clz", '2', gen_int_libfunc)
 OPTAB_NL(ctz_optab, "ctz$a2", CTZ, "ctz", '2', gen_int_libfunc)
diff --git a/gcc/testsuite/gcc.dg/builtin-bitreverse-1.c b/gcc/testsuite/gcc.dg/builtin-bitreverse-1.c
new file mode 100644 (file)
index 0000000..0047e8d
--- /dev/null
@@ -0,0 +1,71 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+[[gnu::noipa]] static unsigned char
+br8 (unsigned char x)
+{
+  return __builtin_bitreverse8 (x);
+}
+
+[[gnu::noipa]] static unsigned short
+br16 (unsigned short x)
+{
+  return __builtin_bitreverse16 (x);
+}
+
+[[gnu::noipa]] static unsigned
+br32 (unsigned x)
+{
+  return __builtin_bitreverse32 (x);
+}
+
+[[gnu::noipa]] static unsigned long long
+br64 (unsigned long long x)
+{
+  return __builtin_bitreverse64 (x);
+}
+
+int
+main ()
+{
+#if __CHAR_BIT__ == 8
+  if (br8 (0x00u) != 0x00u)
+    __builtin_abort ();
+  if (br8 (0x01u) != 0x80u)
+    __builtin_abort ();
+  if (br8 (0x34u) != 0x2cu)
+    __builtin_abort ();
+  if (br8 (0xffu) != 0xffu)
+    __builtin_abort ();
+#if __SIZEOF_SHORT__ == 2
+  if (br16 (0x0000u) != 0x0000u)
+    __builtin_abort ();
+  if (br16 (0x0001u) != 0x8000u)
+    __builtin_abort ();
+  if (br16 (0x1234u) != 0x2c48u)
+    __builtin_abort ();
+  if (br16 (0xffffu) != 0xffffu)
+    __builtin_abort ();
+#endif
+#if __SIZEOF_INT__ == 4
+  if (br32 (0x00000000u) != 0x00000000u)
+    __builtin_abort ();
+  if (br32 (0x00000001u) != 0x80000000u)
+    __builtin_abort ();
+  if (br32 (0x01234567u) != 0xe6a2c480u)
+    __builtin_abort ();
+  if (br32 (0xffffffffu) != 0xffffffffu)
+    __builtin_abort ();
+#endif
+#if __SIZEOF_LONG_LONG__ == 8
+  if (br64 (0x0000000000000000ull) != 0x0000000000000000ull)
+    __builtin_abort ();
+  if (br64 (0x0000000000000001ull) != 0x8000000000000000ull)
+    __builtin_abort ();
+  if (br64 (0x0123456789abcdefull) != 0xf7b3d591e6a2c480ull)
+    __builtin_abort ();
+  if (br64 (0xffffffffffffffffull) != 0xffffffffffffffffull)
+    __builtin_abort ();
+#endif
+#endif
+}
diff --git a/gcc/testsuite/gcc.dg/builtin-bitreverse-2.c b/gcc/testsuite/gcc.dg/builtin-bitreverse-2.c
new file mode 100644 (file)
index 0000000..42a677c
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-std=gnu11" } */
+
+#if __CHAR_BIT__ == 8
+_Static_assert (__builtin_bitreverse8 (0x01u) == 0x80u, "bitreverse8");
+#if __SIZEOF_SHORT__ == 2
+_Static_assert (__builtin_bitreverse16 (0x0001u) == 0x8000u, "bitreverse16");
+#endif
+#if __SIZEOF_INT__ == 4
+_Static_assert (__builtin_bitreverse32 (0x12345678u) == 0x1e6a2c48u,
+               "bitreverse32");
+#endif
+#if __SIZEOF_LONG_LONG__ == 8
+_Static_assert (__builtin_bitreverse64 (0x0123456789abcdefull)
+                == 0xf7b3d591e6a2c480ull, "bitreverse64");
+#endif
+#endif
index 952ebf17e7acd4c7dfbfe7f265e6e762daf499d7..974bef17832f01fb7534f70a61f34c715558d339 100644 (file)
@@ -2435,6 +2435,10 @@ evaluate_stmt (gimple *stmt)
            case BUILT_IN_BSWAP32:
            case BUILT_IN_BSWAP64:
            case BUILT_IN_BSWAP128:
+           case BUILT_IN_BITREVERSE8:
+           case BUILT_IN_BITREVERSE16:
+           case BUILT_IN_BITREVERSE32:
+           case BUILT_IN_BITREVERSE64:
              val = get_value_for_expr (gimple_call_arg (stmt, 0), true);
              if (val.lattice_val == UNDEFINED)
                break;
index 324559e6a7deee1862225bbed369743215dc53b6..d9e1edb9b144542245ff1514aa866e7afa15c71c 100644 (file)
@@ -819,6 +819,10 @@ empty_bb_or_one_feeding_into_p (basic_block bb,
        case CFN_BUILT_IN_BSWAP32:
        case CFN_BUILT_IN_BSWAP64:
        case CFN_BUILT_IN_BSWAP128:
+       case CFN_BUILT_IN_BITREVERSE8:
+       case CFN_BUILT_IN_BITREVERSE16:
+       case CFN_BUILT_IN_BITREVERSE32:
+       case CFN_BUILT_IN_BITREVERSE64:
        CASE_CFN_FFS:
        CASE_CFN_PARITY:
        CASE_CFN_POPCOUNT:
@@ -2578,6 +2582,10 @@ cond_removal_in_builtin_zero_pattern (basic_block cond_bb,
     case CFN_BUILT_IN_BSWAP32:
     case CFN_BUILT_IN_BSWAP64:
     case CFN_BUILT_IN_BSWAP128:
+    case CFN_BUILT_IN_BITREVERSE8:
+    case CFN_BUILT_IN_BITREVERSE16:
+    case CFN_BUILT_IN_BITREVERSE32:
+    case CFN_BUILT_IN_BITREVERSE64:
     CASE_CFN_FFS:
     CASE_CFN_PARITY:
     CASE_CFN_POPCOUNT: