]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Run pass_sink_code once more before store_merging
authorXionghu Luo <luoxhu@linux.ibm.com>
Wed, 19 May 2021 02:34:18 +0000 (21:34 -0500)
committerXionghu Luo <luoxhu@linux.ibm.com>
Wed, 19 May 2021 02:34:18 +0000 (21:34 -0500)
Gimple sink code pass runs quite early, there may be some new
oppertunities exposed by later gimple optmization passes, this patch
runs the sink code pass once more before store_merging.  For detailed
discussion, please refer to:
https://gcc.gnu.org/pipermail/gcc-patches/2020-December/562352.html

Tested the SPEC2017 performance on P8LE, 544.nab_r is improved
by 2.43%, but no big changes to other cases, GEOMEAN is improved quite
small with 0.25%.

gcc/ChangeLog:

2021-05-18  Xionghu Luo  <luoxhu@linux.ibm.com>

* passes.def: Add sink_code pass before store_merging.
* tree-ssa-sink.c (pass_sink_code:clone): New.

gcc/testsuite/ChangeLog:

2021-05-18  Xionghu Luo  <luoxhu@linux.ibm.com>

* gcc.dg/tree-ssa/ssa-sink-1.c: Adjust.
* gcc.dg/tree-ssa/ssa-sink-2.c: Ditto.
* gcc.dg/tree-ssa/ssa-sink-3.c: Ditto.
* gcc.dg/tree-ssa/ssa-sink-4.c: Ditto.
* gcc.dg/tree-ssa/ssa-sink-5.c: Ditto.
* gcc.dg/tree-ssa/ssa-sink-6.c: Ditto.
* gcc.dg/tree-ssa/ssa-sink-7.c: Ditto.
* gcc.dg/tree-ssa/ssa-sink-8.c: Ditto.
* gcc.dg/tree-ssa/ssa-sink-9.c: Ditto.
* gcc.dg/tree-ssa/ssa-sink-10.c: Ditto.
* gcc.dg/tree-ssa/ssa-sink-13.c: Ditto.
* gcc.dg/tree-ssa/ssa-sink-14.c: Ditto.
* gcc.dg/tree-ssa/ssa-sink-16.c: Ditto.
* gcc.dg/tree-ssa/ssa-sink-17.c: Ditto.
* gcc.dg/tree-ssa/ssa-sink-18.c: New.

17 files changed:
gcc/passes.def
gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-1.c
gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c
gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-13.c
gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-14.c
gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-16.c
gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-17.c
gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-18.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-2.c
gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-3.c
gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-4.c
gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-5.c
gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-6.c
gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-7.c
gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-8.c
gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-9.c
gcc/tree-ssa-sink.c

index de39fa48b3ded63910165c24b5dc9c900346c4cc..945d2bc797c00249c2f5fdcc5d4ac42bdd4648c4 100644 (file)
@@ -348,6 +348,7 @@ along with GCC; see the file COPYING3.  If not see
       NEXT_PASS (pass_phiopt, false /* early_p */);
       NEXT_PASS (pass_fold_builtins);
       NEXT_PASS (pass_optimize_widening_mul);
+      NEXT_PASS (pass_sink_code);
       NEXT_PASS (pass_store_merging);
       NEXT_PASS (pass_tail_calls);
       /* If DCE is not run before checking for uninitialized uses,
index 411585a6dc44e3b9977b4b2c06f82a8a65087e2c..57b501681f397bbdbf4e31d26c81ce13ddb2d56f 100644 (file)
@@ -7,4 +7,4 @@ foo (int a, int b, int c)
   return c ? x : a;
 }
 /* We should sink the x = a * b calculation into the branch that returns x. */
-/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink" } } */
+/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink1" } } */
index 37e4d2fe687199ffc3cf677dcdc8775c14ff0bca..535cb3208f502cfcc73c07ad1f40794d6492e6c9 100644 (file)
@@ -16,4 +16,4 @@ void foo (void)
     }
 }
 
-/* { dg-final { scan-tree-dump-times "Sinking # VUSE" 4 "sink" } } */
+/* { dg-final { scan-tree-dump-times "Sinking # VUSE" 4 "sink1" } } */
index a65ba35d4ba9d8a11f57934c2e576b8a88fd4084..584fd91f43a8155818c848034f3ac014cf1e9616 100644 (file)
@@ -21,5 +21,5 @@ void test ()
 
 /* We should sink/merge all stores and end up with a single BB.  */
 
-/* { dg-final { scan-tree-dump-times "MEM\[^\n\r\]* = 0;" 3 "sink" } } */
-/* { dg-final { scan-tree-dump-times "<bb " 1 "sink" } } */
+/* { dg-final { scan-tree-dump-times "MEM\[^\n\r\]* = 0;" 3 "sink1" } } */
+/* { dg-final { scan-tree-dump-times "<bb " 1 "sink1" } } */
index 771cd4420c4d71956c1cf183e5115e6d9d7bf3fa..f5418b06deb1b10dd359dd6223e43defa9571e63 100644 (file)
@@ -13,5 +13,5 @@ void foo (int b)
 /* We should have sunk the store and inserted a PHI to merge the
    stored values.  */
 
-/* { dg-final { scan-tree-dump-times " = PHI" 1 "sink" } } */
-/* { dg-final { scan-tree-dump-times "x = " 1 "sink" } } */
+/* { dg-final { scan-tree-dump-times " = PHI" 1 "sink1" } } */
+/* { dg-final { scan-tree-dump-times "x = " 1 "sink1" } } */
index 610c8d60ebeec421ab1e884b9b747551c8848127..012b165fbab71fdabdf9b7c51604e7728f7739d6 100644 (file)
@@ -10,5 +10,5 @@ int f(int n)
   return j;
 }
 
-/* { dg-final { scan-tree-dump "Sinking j_. = __builtin_ffs" "sink" } } */
+/* { dg-final { scan-tree-dump "Sinking j_. = __builtin_ffs" "sink1" } } */
 /* { dg-final { scan-tree-dump "return 2;" "optimized" } } */
index cf2e2a0f76641f030fda3af4277aeaf640958985..d0aeeb312cc0a1927e47270cb730fdbfaa9b8349 100644 (file)
@@ -12,4 +12,4 @@ int my_f(int a, int b)
 }
 
 /* We should sink the call to pure_f to the if block.  */
-/* { dg-final { scan-tree-dump "Sinking # VUSE" "sink" } } */
+/* { dg-final { scan-tree-dump "Sinking # VUSE" "sink1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-18.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-18.c
new file mode 100644 (file)
index 0000000..421c78e
--- /dev/null
@@ -0,0 +1,212 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+
+#include <stdint.h>
+
+#define HLOG 16
+#define        MAX_LIT        (1 <<  5)
+typedef const uint8_t *LZF_HSLOT;
+typedef LZF_HSLOT LZF_STATE[1 << (HLOG)];
+
+int
+compute_on_bytes (uint8_t *in_data, int in_len, uint8_t *out_data, int out_len)
+{
+  LZF_STATE htab;
+
+  uint8_t *ip = in_data;
+  uint8_t *op = out_data;
+  uint8_t *in_end = ip + in_len;
+  uint8_t *out_end = op + out_len;
+  uint8_t *ref;
+
+  unsigned long off;
+  unsigned int hval;
+  int lit;
+
+  if (!in_len || !out_len)
+    return 0;
+
+  lit = 0;
+  op++;
+  hval = (((ip[0]) << 8) | ip[1]);
+
+  while (ip < in_end - 2)
+    {
+      uint8_t *hslot;
+
+      hval = (((hval) << 8) | ip[2]);
+      hslot = (uint8_t*)(htab + (((hval >> (3 * 8 - 16)) - hval * 5) & ((1 << (16)) - 1)));
+
+      ref = *hslot + in_data;
+      *hslot = ip - in_data;
+
+      if (1 && (off = ip - ref - 1) < (1 << 13) && ref > in_data
+         && ref[2] == ip[2]
+         && ((ref[1] << 8) | ref[0]) == ((ip[1] << 8) | ip[0]))
+       {
+         unsigned int len = 2;
+         unsigned int maxlen = in_end - ip - len;
+         maxlen
+           = maxlen > ((1 << 8) + (1 << 3)) ? ((1 << 8) + (1 << 3)) : maxlen;
+
+         if ((op + 3 + 1 >= out_end) != 0)
+           if (op - !lit + 3 + 1 >= out_end)
+             return 0;
+
+         op[-lit - 1] = lit - 1;
+         op -= !lit;
+
+         for (;;)
+           {
+             if (maxlen > 16)
+               {
+                 len++;
+                 if (ref[len] != ip[len])
+                   break;
+                 len++;
+                 if (ref[len] != ip[len])
+                   break;
+                 len++;
+                 if (ref[len] != ip[len])
+                   break;
+                 len++;
+                 if (ref[len] != ip[len])
+                   break;
+
+                 len++;
+                 if (ref[len] != ip[len])
+                   break;
+                 len++;
+                 if (ref[len] != ip[len])
+                   break;
+                 len++;
+                 if (ref[len] != ip[len])
+                   break;
+                 len++;
+                 if (ref[len] != ip[len])
+                   break;
+
+                 len++;
+                 if (ref[len] != ip[len])
+                   break;
+                 len++;
+                 if (ref[len] != ip[len])
+                   break;
+                 len++;
+                 if (ref[len] != ip[len])
+                   break;
+                 len++;
+                 if (ref[len] != ip[len])
+                   break;
+
+                 len++;
+                 if (ref[len] != ip[len])
+                   break;
+                 len++;
+                 if (ref[len] != ip[len])
+                   break;
+                 len++;
+                 if (ref[len] != ip[len])
+                   break;
+                 len++;
+                 if (ref[len] != ip[len])
+                   break;
+               }
+
+             do
+               {
+                 len++;
+               }
+             while (len < maxlen && ip[len] == ref[len]);
+
+             break;
+           }
+
+         len -= 2;
+         ip++;
+
+         if (len < 7)
+           {
+             *op++ = (off >> 8) + (len << 5);
+           }
+         else
+           {
+             *op++ = (off >> 8) + (7 << 5);
+             *op++ = len - 7;
+           }
+         *op++ = off;
+         lit = 0;
+         op++;
+         ip += len + 1;
+
+         if (ip >= in_end - 2)
+           break;
+
+         --ip;
+         --ip;
+
+         hval = (((ip[0]) << 8) | ip[1]);
+         hval = (((hval) << 8) | ip[2]);
+         htab[(((hval >> (3 * 8 - 16)) - hval * 5) & ((1 << (16)) - 1))]
+           = (LZF_HSLOT)(ip - in_data);
+         ip++;
+
+         hval = (((hval) << 8) | ip[2]);
+         htab[(((hval >> (3 * 8 - 16)) - hval * 5) & ((1 << (16)) - 1))]
+           = (LZF_HSLOT)(ip - in_data);
+         ip++;
+       }
+      else
+       {
+         if (op >= out_end)
+           return 0;
+
+         lit++;
+         *op++ = *ip++;
+
+         if (lit == (1 << 5))
+           {
+             op[-lit - 1] = lit - 1;
+             lit = 0;
+             op++;
+           }
+       }
+    }
+  if (op + 3 > out_end) /* at most 3 bytes can be missing here */
+    return 0;
+
+  while (ip < in_end)
+    {
+      lit++;
+      *op++ = *ip++;
+      if (lit == MAX_LIT)
+       {
+         op[-lit - 1] = lit - 1; /* stop run */
+         lit = 0;
+         op++; /* start run */
+       }
+    }
+
+  op[-lit - 1] = lit - 1; /* end run */
+  op -= !lit;            /* undo run if length is zero */
+
+  return op - out_data;
+ }
+
+ /* For this case, pass sink2 sinks statements from hot loop header to loop
+    exits after gimple loop optimizations, which generates instructions executed
+    each iteration in loop, but the results are used outside of loop:
+    With -m64,
+    "Sinking _367 = (uint8_t *) _320;
+    from bb 31 to bb 90
+    Sinking _320 = _321 + ivtmp.25_326;
+    from bb 31 to bb 90
+    Sinking _321 = (unsigned long) ip_229;
+    from bb 31 to bb 90
+    Sinking len_158 = _322 + 4294967295;
+    from bb 31 to bb 33"
+    When -m32, Power and X86 will sink 3 instructions, but arm ilp32 couldn't
+    sink due to ivopts chooses two IV candidates instead of one, which is
+    expected, so this case is restricted to lp64 only so far.  */
+
+ /* { dg-final { scan-tree-dump-times "Sunk statements: 4" 1 "sink2" { target lp64 } } } */
index 6aa5a182a3aac83069ab5276a1d3fada39525298..a0b4734b1e01e75b17e10392d61ae429bba3513d 100644 (file)
@@ -9,4 +9,4 @@ bar (int a, int b, int c)
   return y;
 }
 /* We should sink the x = a * b calculation into the else branch  */
-/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink" } } */
+/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink1" } } */
index 599997e0e6bf219e88a3b5d5a18115c50a53c1cb..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 (file)
@@ -1,15 +0,0 @@
-/* { dg-do compile } */ 
-/* { dg-options "-O2 -fdump-tree-sink-stats" } */
-extern void foo(int a);
-int
-main (int argc)
-{
-  int a;
-  a = argc + 1;
-  if (argc + 3)
-    {
-      foo (a);
-    }
-}
-/* We should sink the a = argc + 1 calculation into the if branch  */
-/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink" } } */
index 784edd2fc878dd60f19eabe9070ce35e331bd261..1e3cfa93fa859ebae7535ebe2301af003ab05c87 100644 (file)
@@ -17,4 +17,4 @@ main (int argc)
   foo2 (a);
 }
 /* We should sink the first a = b + c calculation into the else branch  */
-/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink" } } */
+/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink1" } } */
index dbdde39add669011aa661011fa7db0072a8d3a04..f04da5da9b0f71ca12e4d0a9f2c9812945d6553a 100644 (file)
@@ -44,4 +44,4 @@ void foo(int16_t runs[], uint8_t alpha[], int x, int count)
 }
 
 /* We should not sink the next_runs = runs + x calculation after the loop.  */
-/* { dg-final { scan-tree-dump-times "Sunk statements:" 0 "sink" } } */
+/* { dg-final { scan-tree-dump-times "Sunk statements:" 0 "sink1" } } */
index 1abae9f7943e1e4c585710d76ee726a2b397e49a..31f5af330f9561b4c1d9e8d8cfc83cf5bd76b389 100644 (file)
@@ -14,4 +14,4 @@ int foo(int *a, int r)
 
 /* *a = 1 should be sunk to the else block.  */
 
-/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink" } } */
+/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink1" } } */
index ec3288f4e69a0956ae5c82427b10dd526a592cf6..bd748442edc8167af53df7b0b6a36de6eeed7f06 100644 (file)
@@ -15,4 +15,4 @@ int foo(int *a, int r, short *b)
 
 /* *a = 1 should be sunk to the else block.  */
 
-/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink" } } */
+/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink1" } } */
index 48af4218fc074b802184ff85a45bc2ffcd191b4a..4b23b567fd02032142d3fa3f88be08f60e150d46 100644 (file)
@@ -24,4 +24,4 @@ int foo(int *a, int r, short *b)
 
 /* *a = 1 should be sunk into the default case.  */
 
-/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink" } } */
+/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink1" } } */
index 509a76330a4d6a59b36c26b372befbf39177cb18..32bfc81741aa244d85f0cb52484f35eda0516e98 100644 (file)
@@ -15,4 +15,4 @@ int foo(int *a, int r, int *b)
 
 /* *a = 1 should be sunk to the else block.  */
 
-/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink" } } */
+/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink1" } } */
index d33e56ef915d07fd830d1e20de05311aa9647797..d252cbb5c518632477ed84650ec8f256244c618f 100644 (file)
@@ -819,6 +819,7 @@ public:
   /* opt_pass methods: */
   virtual bool gate (function *) { return flag_tree_sink != 0; }
   virtual unsigned int execute (function *);
+  opt_pass *clone (void) { return new pass_sink_code (m_ctxt); }
 
 }; // class pass_sink_code