]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Fold truncations of left shifts in match.pd
authorRoger Sayle <roger@nextmovesoftware.com>
Wed, 15 Jun 2022 07:31:13 +0000 (09:31 +0200)
committerRoger Sayle <roger@nextmovesoftware.com>
Wed, 15 Jun 2022 07:31:13 +0000 (09:31 +0200)
Whilst investigating PR 55278, I noticed that the tree-ssa optimizers
aren't eliminating the promotions of shifts to "int" as inserted by the
c-family front-ends, instead leaving this simplification to be left to
the RTL optimizers.  This patch allows match.pd to do this itself earlier,
narrowing (T)(X << C) to (T)X << C when the constant C is known to be
valid for the (narrower) type T.

Hence for this simple test case:
short foo(short x) { return x << 5; }

the .optimized dump currently looks like:

short int foo (short int x)
{
  int _1;
  int _2;
  short int _4;

  <bb 2> [local count: 1073741824]:
  _1 = (int) x_3(D);
  _2 = _1 << 5;
  _4 = (short int) _2;
  return _4;
}

but with this patch, now becomes:

short int foo (short int x)
{
  short int _2;

  <bb 2> [local count: 1073741824]:
  _2 = x_1(D) << 5;
  return _2;
}

This is always reasonable as RTL expansion knows how to use
widening optabs if it makes sense at the RTL level to perform
this shift in a wider mode.

Of course, there's often a catch.  The above simplification not only
reduces the number of statements in gimple, but also allows further
optimizations, for example including the perception of rotate idioms
and bswap16.  Alas, optimizing things earlier than anticipated
requires several testsuite changes [though all these tests have
been confirmed to generate identical assembly code on x86_64].
The only significant change is that the vectorization pass wouldn't
previously lower rotations of signed integer types.  Hence this
patch includes a refinement to tree-vect-patterns to allow signed
types, by using the equivalent unsigned shifts.

2022-06-15  Roger Sayle  <roger@nextmovesoftware.com>
    Richard Biener  <rguenther@suse.de>

gcc/ChangeLog
* match.pd (convert (lshift @1 INTEGER_CST@2)): Narrow integer
left shifts by a constant when the result is truncated, and the
shift constant is well-defined.
* tree-vect-patterns.cc (vect_recog_rotate_pattern): Add
support for rotations of signed integer types, by lowering
using unsigned vector shifts.

gcc/testsuite/ChangeLog
* gcc.dg/fold-convlshift-4.c: New test case.
* gcc.dg/optimize-bswaphi-1.c: Update found bswap count.
* gcc.dg/tree-ssa/pr61839_3.c: Shift is now optimized before VRP.
* gcc.dg/vect/vect-over-widen-1-big-array.c: Remove obsolete tests.
* gcc.dg/vect/vect-over-widen-1.c: Likewise.
* gcc.dg/vect/vect-over-widen-3-big-array.c: Likewise.
* gcc.dg/vect/vect-over-widen-3.c: Likewise.
* gcc.dg/vect/vect-over-widen-4-big-array.c: Likewise.
* gcc.dg/vect/vect-over-widen-4.c: Likewise.

gcc/match.pd
gcc/testsuite/gcc.dg/fold-convlshift-4.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/optimize-bswaphi-1.c
gcc/testsuite/gcc.dg/tree-ssa/pr61839_3.c
gcc/testsuite/gcc.dg/vect/vect-over-widen-1-big-array.c
gcc/testsuite/gcc.dg/vect/vect-over-widen-1.c
gcc/testsuite/gcc.dg/vect/vect-over-widen-3-big-array.c
gcc/testsuite/gcc.dg/vect/vect-over-widen-3.c
gcc/testsuite/gcc.dg/vect/vect-over-widen-4-big-array.c
gcc/testsuite/gcc.dg/vect/vect-over-widen-4.c
gcc/tree-vect-patterns.cc

index 776c9c6489af75d21acbe016c1c08818a6cd5dc7..d4058d619799cddd2cf8cca8cb773882b28d86a5 100644 (file)
@@ -3621,17 +3621,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
     (if (integer_zerop (@2) || integer_all_onesp (@2))
      (cmp @0 @2)))))
 
-/* Both signed and unsigned lshift produce the same result, so use
-   the form that minimizes the number of conversions.  Postpone this
-   transformation until after shifts by zero have been folded.  */
+/* Narrow a lshift by constant.  */
 (simplify
- (convert (lshift:s@0 (convert:s@1 @2) INTEGER_CST@3))
+ (convert (lshift:s@0 @1 INTEGER_CST@2))
  (if (INTEGRAL_TYPE_P (type)
-      && tree_nop_conversion_p (type, TREE_TYPE (@0))
-      && INTEGRAL_TYPE_P (TREE_TYPE (@2))
-      && TYPE_PRECISION (TREE_TYPE (@2)) <= TYPE_PRECISION (type)
-      && !integer_zerop (@3))
-  (lshift (convert @2) @3)))
+      && INTEGRAL_TYPE_P (TREE_TYPE (@0))
+      && !integer_zerop (@2)
+      && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@0)))
+  (if (TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (@0))
+       || wi::ltu_p (wi::to_wide (@2), TYPE_PRECISION (type)))
+   (lshift (convert @1) @2)
+   (if (wi::ltu_p (wi::to_wide (@2), TYPE_PRECISION (TREE_TYPE (@0))))
+    { build_zero_cst (type); }))))
 
 /* Simplifications of conversions.  */
 
diff --git a/gcc/testsuite/gcc.dg/fold-convlshift-4.c b/gcc/testsuite/gcc.dg/fold-convlshift-4.c
new file mode 100644 (file)
index 0000000..001627f
--- /dev/null
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+short foo(short x)
+{
+  return x << 5;
+}
+
+/* { dg-final { scan-tree-dump-not "\\(int\\)" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "\\(short int\\)" "optimized" } } */
index d045da9ea8040dfebdcb0ba179aacbfb262504e2..a5d8bfd5838ad4fc4cd17ef5e14f02a87a5eb79e 100644 (file)
@@ -68,4 +68,4 @@ get_unaligned_16_be (unsigned char *p)
 
 
 /* { dg-final { scan-tree-dump-times "16 bit load in target endianness found at" 4 "bswap" } } */
-/* { dg-final { scan-tree-dump-times "16 bit bswap implementation found at" 5 "bswap" } } */
+/* { dg-final { scan-tree-dump-times "16 bit bswap implementation found at" 4 "bswap" } } */
index bc2126fce4ec74464b9928e42e56c057a85b1abb..38cf792aca2da8d27728b39d6100b8652f65e9be 100644 (file)
@@ -1,6 +1,6 @@
 /* PR tree-optimization/61839.  */
 /* { dg-do run } */
-/* { dg-options "-O2 -fdump-tree-vrp -fdump-tree-optimized -fdisable-tree-ethread -fdisable-tree-threadfull1" } */
+/* { dg-options "-O2 -fdump-tree-optimized -fdisable-tree-ethread -fdisable-tree-threadfull1" } */
 
 __attribute__ ((noinline))
 int foo (int a, unsigned b)
@@ -21,6 +21,4 @@ int main ()
   foo (-1, b);
 }
 
-/* Scan for c [12, 13] << 8 in function foo.  */
-/* { dg-final { scan-tree-dump-times "3072 : 3328" 1  "vrp1" } } */
 /* { dg-final { scan-tree-dump-times "3072" 0  "optimized" } } */
index 9e5f464a88f5f1f0a09baa5d805f940cc360217f..9a5141ee6ecce133ce85edcf75603e0b3ce41f04 100644 (file)
@@ -58,9 +58,7 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 2 "vect" { target vect_widen_shift } } } */
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 3} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 3} "vect" } } */
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 8} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 5} "vect" } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 
index c2d07974dfb85d55ca2fb6f493dadf2fda060b4f..f2d284ca9bee4af23c25726a54866bfaf054c46c 100644 (file)
@@ -62,9 +62,7 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 2 "vect" { target vect_widen_shift } } } */
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 3} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 3} "vect" } } */
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 8} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 5} "vect" } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 
index 37da7c917e4d79316b56f782c873db7f0ffef76a..6f89aacbebf5094c7b1081b12c7fcce1b97d536b 100644 (file)
@@ -59,9 +59,7 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 3} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 3} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 8} "vect" } } */
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 9} "vect" } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 
index 41384807a45a4028f84a4f40486b834022ca54e6..a1e1182c6067db47445ad07b77e5c6e067858488 100644 (file)
@@ -57,9 +57,7 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 3} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 3} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 8} "vect" } } */
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 9} "vect" } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 
index 514337c579cf777a6dd4d8b04f0ff5e963ba3c1f..03a6e6795ec68e5f9a35da93ca7a8d50a3012a21 100644 (file)
@@ -62,9 +62,7 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 2 "vect" { target vect_widen_shift } } } */
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 3} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 3} "vect" } } */
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 8} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 5} "vect" } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 
index 3d536d5ddcc14cfd67c141c144a1d686edace976..0ef377f1f58a6f6466380a59c381333dbc4805df 100644 (file)
@@ -66,9 +66,7 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 2 "vect" { target vect_widen_shift } } } */
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 3} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 3} "vect" } } */
-/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* << 8} "vect" } } */
 /* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 5} "vect" } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 
index 0fad4dbd0945c6c176f3457b751e812f17fcd148..8f624863971392c891fde7278949c8818f646576 100644 (file)
@@ -2614,8 +2614,7 @@ vect_recog_rotate_pattern (vec_info *vinfo,
          || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
          || TYPE_PRECISION (type) <= 16
          || TREE_CODE (oprnd0) != SSA_NAME
-         || BITS_PER_UNIT != 8
-         || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
+         || BITS_PER_UNIT != 8)
        return NULL;
 
       stmt_vec_info def_stmt_info;
@@ -2688,8 +2687,7 @@ vect_recog_rotate_pattern (vec_info *vinfo,
 
   if (TREE_CODE (oprnd0) != SSA_NAME
       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
-      || !INTEGRAL_TYPE_P (type)
-      || !TYPE_UNSIGNED (type))
+      || !INTEGRAL_TYPE_P (type))
     return NULL;
 
   stmt_vec_info def_stmt_info;
@@ -2745,31 +2743,36 @@ vect_recog_rotate_pattern (vec_info *vinfo,
        goto use_rotate;
     }
 
+  tree utype = unsigned_type_for (type);
+  tree uvectype = get_vectype_for_scalar_type (vinfo, utype);
+  if (!uvectype)
+    return NULL;
+
   /* If vector/vector or vector/scalar shifts aren't supported by the target,
      don't do anything here either.  */
-  optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
-  optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
+  optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_vector);
+  optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_vector);
   if (!optab1
-      || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
+      || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
       || !optab2
-      || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+      || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
     {
       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
        return NULL;
-      optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
-      optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
+      optab1 = optab_for_tree_code (LSHIFT_EXPR, uvectype, optab_scalar);
+      optab2 = optab_for_tree_code (RSHIFT_EXPR, uvectype, optab_scalar);
       if (!optab1
-         || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
+         || optab_handler (optab1, TYPE_MODE (uvectype)) == CODE_FOR_nothing
          || !optab2
-         || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+         || optab_handler (optab2, TYPE_MODE (uvectype)) == CODE_FOR_nothing)
        return NULL;
     }
 
   *type_out = vectype;
 
-  if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
+  if (!useless_type_conversion_p (utype, TREE_TYPE (oprnd0)))
     {
-      def = vect_recog_temp_ssa_var (type, NULL);
+      def = vect_recog_temp_ssa_var (utype, NULL);
       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
       oprnd0 = def;
@@ -2779,7 +2782,7 @@ vect_recog_rotate_pattern (vec_info *vinfo,
     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
 
   def = NULL_TREE;
-  scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
+  scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
   if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
     def = oprnd1;
   else if (def_stmt && gimple_assign_cast_p (def_stmt))
@@ -2793,7 +2796,7 @@ vect_recog_rotate_pattern (vec_info *vinfo,
 
   if (def == NULL_TREE)
     {
-      def = vect_recog_temp_ssa_var (type, NULL);
+      def = vect_recog_temp_ssa_var (utype, NULL);
       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
     }
@@ -2839,13 +2842,13 @@ vect_recog_rotate_pattern (vec_info *vinfo,
        append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
     }
 
-  var1 = vect_recog_temp_ssa_var (type, NULL);
+  var1 = vect_recog_temp_ssa_var (utype, NULL);
   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
                                        ? LSHIFT_EXPR : RSHIFT_EXPR,
                                  oprnd0, def);
   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
 
-  var2 = vect_recog_temp_ssa_var (type, NULL);
+  var2 = vect_recog_temp_ssa_var (utype, NULL);
   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
                                        ? RSHIFT_EXPR : LSHIFT_EXPR,
                                  oprnd0, def2);
@@ -2855,9 +2858,15 @@ vect_recog_rotate_pattern (vec_info *vinfo,
   vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
 
   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
-  var = vect_recog_temp_ssa_var (type, NULL);
+  var = vect_recog_temp_ssa_var (utype, NULL);
   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
 
+  if (!useless_type_conversion_p (type, utype))
+    {
+      append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
+      tree result = vect_recog_temp_ssa_var (type, NULL);
+      pattern_stmt = gimple_build_assign (result, NOP_EXPR, var);
+    }
   return pattern_stmt;
 }