From: Daniel Barboza Date: Wed, 18 Feb 2026 13:29:50 +0000 (-0300) Subject: match.pd: remove bit set/bit clear branch mispredict [PR64567] X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b4162b53545220b2ae19993e2edf4707bab4a3db;p=thirdparty%2Fgcc.git match.pd: remove bit set/bit clear branch mispredict [PR64567] Add two patterns to eliminate mispredicts in the following bit ops scenarios: - checking if a single bit is not set, and in this case set it: always set the bit; - checking if a bitmask is set (even partially), and in this case clear it: always clear the bitmask. Bootstrapped and tested with x86_64-pc-linux-gnu. PR tree-optimization/64567 gcc/ChangeLog: * match.pd (`cond (bit_and A IMM) (bit_or A IMM) A`): New pattern. (`cond (bit_and A IMM) (bit_and A ~IMM) A`): New pattern. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/pr64567-2.c: New test. * gcc.dg/tree-ssa/pr64567.c: New test. --- diff --git a/gcc/match.pd b/gcc/match.pd index d0f913e640e..94f69185adf 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -6322,6 +6322,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && !expand_vec_cond_expr_p (TREE_TYPE (@1), TREE_TYPE (@0))))) (vec_cond @0 (op!:type @3 @1) (op!:type @3 @2))))) +/* If we have a "if a bit is not set, set it" case, + just set the bit all the time (PR 64567). Note that + this does not work if we're checking for more than one + bit, e.g. (a & 5 ? a | 5 : a) will fail for a = 1 (we + would return 5 instead of 1). */ +(simplify + (cond (eq (bit_and @0 INTEGER_CST@1) integer_zerop) + (bit_ior@2 @0 INTEGER_CST@1) @0) + (if (wi::popcount (wi::to_wide (@1)) == 1) + @2)) + +/* A clear bit version of the above: "if a bitmask is + set, clear it". In this case always clear the bitmask + (see PR 64567). */ +(simplify + (cond (ne (bit_and @0 INTEGER_CST@1) integer_zerop) + (bit_and@3 @0 INTEGER_CST@2) @0) + (if (wi::to_wide (@2) == ~wi::to_wide (@1)) + @3)) + #if GIMPLE (match (nop_atomic_bit_test_and_p @0 @1 @4) (bit_and (convert?@4 (ATOMIC_FETCH_OR_XOR_N @2 INTEGER_CST@0 @3)) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr64567-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr64567-2.c new file mode 100644 index 00000000000..5b523d2cfe7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr64567-2.c @@ -0,0 +1,52 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +void abort(void); + +/* Macro adapted from builtin-object-size-common.h */ +#define FAIL() \ + do { \ + __builtin_printf ("Failure at line: %d\n", __LINE__); \ + abort(); \ + } while (0) + + +__attribute__((noinline)) +unsigned mask_zero_set (unsigned val, unsigned mask) +{ + if ((val & mask) == 0) + val |= mask; + + return val; +} + +__attribute__((noinline)) +unsigned mask_notzero_clear (unsigned val, unsigned mask) +{ + if ((val & mask) != 0) + val &= ~mask; + + return val; +} + +int main (void) { + if (mask_zero_set (0, 1) != 1) + FAIL (); + + if (mask_zero_set (4, 3) != 7) + FAIL (); + + if (mask_zero_set (1, 5) != 1) + FAIL (); + + if (mask_notzero_clear (7, 1) != 6) + FAIL (); + + if (mask_notzero_clear (7, 3) != 4) + FAIL (); + + if (mask_notzero_clear (8, 6) != 8) + FAIL (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr64567.c b/gcc/testsuite/gcc.dg/tree-ssa/pr64567.c new file mode 100644 index 00000000000..51af71a3938 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr64567.c @@ -0,0 +1,40 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +#define F1 0x01 +#define F2 0x02 + +#define DECLS(n,VOL) \ +__attribute__((noinline,noclone)) \ +unsigned foo##n(unsigned A) \ +{ \ + VOL unsigned flags = A; \ + if (flags & (F1 | F2)) \ + flags &= ~(F1 | F2); \ + return flags; \ +} \ +__attribute__((noinline,noclone)) \ +unsigned bar##n(unsigned A) \ +{ \ + VOL unsigned flags = A; \ + if (!(flags & F1)) \ + flags |= F1; \ + return flags; \ +} \ + +DECLS(0,) +DECLS(1,volatile) + +int main () +{ + for(int A = 0; A < 4; A++) + { + if (foo0 (A) != foo1 (A)) __builtin_abort(); + if (bar0 (A) != bar1 (A)) __builtin_abort(); + } +} + +/* foo1 and bar1 will add 2 gotos each since they are not + being optimized, 'main' will add +6 (2 gotos for the loop, + 2 gotos for each abort check). */ +/* { dg-final { scan-tree-dump-times " goto " 10 optimized } } */