From: Jakub Jelinek Date: Tue, 8 Oct 2024 08:40:29 +0000 (+0200) Subject: ssa-math-opts, i386: Handle most unordered values rather than just 2 [PR116896] X-Git-Tag: basepoints/gcc-16~5386 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ff889b35935d5e796cf308fb2368d4e319c60ece;p=thirdparty%2Fgcc.git ssa-math-opts, i386: Handle most unordered values rather than just 2 [PR116896] On Mon, Oct 07, 2024 at 10:32:57AM +0200, Richard Biener wrote: > > They are implementation defined, -1, 0, 1, 2 is defined by libstdc++: > > using type = signed char; > > enum class _Ord : type { equivalent = 0, less = -1, greater = 1 }; > > enum class _Ncmp : type { _Unordered = 2 }; > > https://eel.is/c++draft/cmp#categories.pre-1 documents them as > > enum class ord { equal = 0, equivalent = equal, less = -1, greater = 1 }; // exposition only > > enum class ncmp { unordered = -127 }; // exposition only > > and now looking at it, LLVM's libc++ takes that literally and uses > > -1, 0, 1, -127. One can't use <=> operator without including > > which provides the enums, so I think if all we care about is libstdc++, > > then just hardcoding -1, 0, 1, 2 is fine, if we want to also optimize > > libc++ when used with gcc, we could support -1, 0, 1, -127 as another > > option. > > Supporting arbitrary 4 values doesn't make sense, at least on x86 the > > only reason to do the conversion to int in an optab is a good sequence > > to turn the flag comparisons to -1, 0, 1. So, either we do nothing > > more than the patch, or add handle both 2 and -127 for unordered, > > or add support for arbitrary value for the unordered case except > > -1, 0, 1 (then -1 could mean signed int, 1 unsigned int, 0 do the jumps > > and any other value what should be returned for unordered. Here is an incremental patch which adds support for (almost) arbitrary unordered constant value. It changes the .SPACESHIP and spaceship4 optab conventions, so 0 means use branches, floating point, -1, 0, 1, 2 results consumed by tree-ssa-math-opts.cc emitted comparisons, -1 means signed int comparisons, -1, 0, 1 results, 1 means unsigned int comparisons, -1, 0, 1 results, and for constant other than -1, 0, 1 which fit into [-128, 127] converted to the PHI type are otherwise specified as the last argument (then it is -1, 0, 1, C results). 2024-10-08 Jakub Jelinek PR middle-end/116896 * tree-ssa-math-opts.cc (optimize_spaceship): Handle unordered values other than 2, but they still need to be signed char range possibly converted to the PHI result and can't be in [-1, 1] range. Use last .SPACESHIP argument of 1 for unsigned int comparisons, -1 for signed int, 0 for floating point branches and any other for floating point with that value as unordered. * config/i386/i386-expand.cc (ix86_expand_fp_spaceship): Use op2 rather const2_rtx if op2 is not const0_rtx for unordered result. (ix86_expand_int_spaceship): Change INTVAL (op2) == 1 tests to INTVAL (op2) != -1. * doc/md.texi (spaceship@var{m}4): Document the above changes. * gcc.target/i386/pr116896.c: New test. --- diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 81dd5064900..32840113cf6 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -3234,7 +3234,7 @@ ix86_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx op2) if (l2) { emit_label (l2); - emit_move_insn (dest, const2_rtx); + emit_move_insn (dest, op2 == const0_rtx ? const2_rtx : op2); } emit_label (lend); } @@ -3250,11 +3250,11 @@ ix86_expand_int_spaceship (rtx dest, rtx op0, rtx op1, rtx op2) operands nor optimize CC mode - we need a mode usable for both LT and GT resp. LTU and GTU comparisons with the same unswapped operands. */ - rtx flags = gen_rtx_REG (INTVAL (op2) == 1 ? CCGCmode : CCmode, FLAGS_REG); + rtx flags = gen_rtx_REG (INTVAL (op2) != 1 ? CCGCmode : CCmode, FLAGS_REG); rtx tmp = gen_rtx_COMPARE (GET_MODE (flags), op0, op1); emit_insn (gen_rtx_SET (flags, tmp)); rtx lt_tmp = gen_reg_rtx (QImode); - ix86_expand_setcc (lt_tmp, INTVAL (op2) == 1 ? LT : LTU, flags, + ix86_expand_setcc (lt_tmp, INTVAL (op2) != 1 ? LT : LTU, flags, const0_rtx); if (GET_MODE (dest) != QImode) { @@ -3264,7 +3264,7 @@ ix86_expand_int_spaceship (rtx dest, rtx op0, rtx op1, rtx op2) lt_tmp = tmp; } rtx gt_tmp = gen_reg_rtx (QImode); - ix86_expand_setcc (gt_tmp, INTVAL (op2) == 1 ? GT : GTU, flags, + ix86_expand_setcc (gt_tmp, INTVAL (op2) != 1 ? GT : GTU, flags, const0_rtx); if (GET_MODE (dest) != QImode) { diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index c58072ea76c..603f74a78c0 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -8575,8 +8575,11 @@ if operand 1 with mode @var{m} compares less than operand 2, equal to operand 2, greater than operand 2 or is unordered with operand 2. Operand 3 should be @code{const0_rtx} if the result is used in comparisons, @code{const1_rtx} if the result is used as integer value and the comparison -is signed, @code{const2_rtx} if the result is used as integer value and -the comparison is unsigned. +is integral unsigned, @code{constm1_rtx} if the result is used as integer +value and the comparison is integral signed and some other @code{CONST_INT} +if the result is used as integer value and the comparison is floating point. +In the last case, instead of setting output operand 0 to 2 for unordered, +set it to operand 3. @var{m} should be a scalar floating point mode. This pattern is not allowed to @code{FAIL}. diff --git a/gcc/testsuite/gcc.target/i386/pr116896.c b/gcc/testsuite/gcc.target/i386/pr116896.c new file mode 100644 index 00000000000..9d1bd882770 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr116896.c @@ -0,0 +1,59 @@ +/* PR middle-end/116896 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att -fno-stack-protector" } */ +/* { dg-final { scan-assembler-times "\tjp\t" 2 } } */ +/* { dg-final { scan-assembler-not "\tj\[^mp\]\[a-z\]*\t" } } */ +/* { dg-final { scan-assembler-times "\tsbb\[bl\]\t\\\$0, " 4 } } */ +/* { dg-final { scan-assembler-times "\tseta\t" 4 } } */ + +signed char +foo (float x, float y) +{ + if (x == y) + return 0; + else if (x < y) + return -1; + else if (x > y) + return 1; + else + return 2; +} + +__attribute__((optimize ("fast-math"))) signed char +bar (float x, float y) +{ + if (x == y) + return 0; + else if (x < y) + return -1; + else if (x > y) + return 1; + else + return 2; +} + +signed char +baz (float x, float y) +{ + if (x == y) + return 0; + else if (x < y) + return -1; + else if (x > y) + return 1; + else + return -127; +} + +__attribute__((optimize ("fast-math"))) signed char +qux (float x, float y) +{ + if (x == y) + return 0; + else if (x < y) + return -1; + else if (x > y) + return 1; + else + return -127; +} diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index 37a3faac731..4584d8c3039 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -6023,11 +6023,12 @@ optimize_spaceship (gcond *stmt) /* Check if there is a single bb into which all failed conditions jump to (perhaps through an empty block) and if it results in - a single integral PHI which just sets it to -1, 0, 1, 2 + a single integral PHI which just sets it to -1, 0, 1, X (or -1, 0, 1 when NaNs can't happen). In that case use 1 rather than 0 as last .SPACESHIP argument to tell backends it might consider different code generation and just cast the result - of .SPACESHIP to the PHI result. */ + of .SPACESHIP to the PHI result. X above is some value + other than -1, 0, 1, for libstdc++ 2, for libc++ -127. */ tree arg3 = integer_zero_node; edge e = EDGE_SUCC (bb0, 0); if (e->dest == bb1) @@ -6054,6 +6055,7 @@ optimize_spaceship (gcond *stmt) && integer_zerop (gimple_phi_arg_def_from_edge (phi, e)) && EDGE_COUNT (bbp->preds) == (HONOR_NANS (TREE_TYPE (arg1)) ? 4 : 3)) { + HOST_WIDE_INT argval = SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) ? 2 : -1; for (unsigned i = 0; phi && i < EDGE_COUNT (bbp->preds) - 1; ++i) { edge e3 = i == 0 ? e1 : i == 1 ? em1 : e2; @@ -6071,16 +6073,29 @@ optimize_spaceship (gcond *stmt) tree a = gimple_phi_arg_def_from_edge (phi, e3); if (TREE_CODE (a) != INTEGER_CST || (i == 0 && !integer_onep (a)) - || (i == 1 && !integer_all_onesp (a)) - || (i == 2 && wi::to_widest (a) != 2)) + || (i == 1 && !integer_all_onesp (a))) { phi = NULL; break; } + if (i == 2) + { + tree minv = TYPE_MIN_VALUE (signed_char_type_node); + tree maxv = TYPE_MAX_VALUE (signed_char_type_node); + widest_int w = widest_int::from (wi::to_wide (a), SIGNED); + if ((w >= -1 && w <= 1) + || w < wi::to_widest (minv) + || w > wi::to_widest (maxv)) + { + phi = NULL; + break; + } + argval = w.to_shwi (); + } } if (phi) arg3 = build_int_cst (integer_type_node, - TYPE_UNSIGNED (TREE_TYPE (arg1)) ? 2 : 1); + TYPE_UNSIGNED (TREE_TYPE (arg1)) ? 1 : argval); } /* For integral <=> comparisons only use .SPACESHIP if it is turned @@ -6094,11 +6109,18 @@ optimize_spaceship (gcond *stmt) gimple_stmt_iterator gsi = gsi_for_stmt (stmt); gsi_insert_before (&gsi, gc, GSI_SAME_STMT); - wide_int wm1 = wi::minus_one (TYPE_PRECISION (integer_type_node)); - wide_int w2 = (HONOR_NANS (TREE_TYPE (arg1)) - ? wi::two (TYPE_PRECISION (integer_type_node)) - : wi::one (TYPE_PRECISION (integer_type_node))); - int_range<1> vr (TREE_TYPE (lhs), wm1, w2); + wide_int wmin = wi::minus_one (TYPE_PRECISION (integer_type_node)); + wide_int wmax = wi::one (TYPE_PRECISION (integer_type_node)); + if (HONOR_NANS (TREE_TYPE (arg1))) + { + if (arg3 == integer_zero_node) + wmax = wi::two (TYPE_PRECISION (integer_type_node)); + else if (tree_int_cst_sgn (arg3) < 0) + wmin = wi::to_wide (arg3); + else + wmax = wi::to_wide (arg3); + } + int_range<1> vr (TREE_TYPE (lhs), wmin, wmax); set_range_info (lhs, vr); if (arg3 != integer_zero_node)