From: Xionghu Luo Date: Wed, 19 May 2021 02:34:18 +0000 (-0500) Subject: Run pass_sink_code once more before store_merging X-Git-Tag: basepoints/gcc-13~7416 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=de56f95afaaa22c67cbeec780921d63e8b34514e;p=thirdparty%2Fgcc.git Run pass_sink_code once more before store_merging Gimple sink code pass runs quite early, there may be some new oppertunities exposed by later gimple optmization passes, this patch runs the sink code pass once more before store_merging. For detailed discussion, please refer to: https://gcc.gnu.org/pipermail/gcc-patches/2020-December/562352.html Tested the SPEC2017 performance on P8LE, 544.nab_r is improved by 2.43%, but no big changes to other cases, GEOMEAN is improved quite small with 0.25%. gcc/ChangeLog: 2021-05-18 Xionghu Luo * passes.def: Add sink_code pass before store_merging. * tree-ssa-sink.c (pass_sink_code:clone): New. gcc/testsuite/ChangeLog: 2021-05-18 Xionghu Luo * gcc.dg/tree-ssa/ssa-sink-1.c: Adjust. * gcc.dg/tree-ssa/ssa-sink-2.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-3.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-4.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-5.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-6.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-7.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-8.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-9.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-10.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-13.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-14.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-16.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-17.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-18.c: New. --- diff --git a/gcc/passes.def b/gcc/passes.def index de39fa48b3de..945d2bc797c0 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -348,6 +348,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_phiopt, false /* early_p */); NEXT_PASS (pass_fold_builtins); NEXT_PASS (pass_optimize_widening_mul); + NEXT_PASS (pass_sink_code); NEXT_PASS (pass_store_merging); NEXT_PASS (pass_tail_calls); /* If DCE is not run before checking for uninitialized uses, diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-1.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-1.c index 411585a6dc44..57b501681f39 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-1.c @@ -7,4 +7,4 @@ foo (int a, int b, int c) return c ? x : a; } /* We should sink the x = a * b calculation into the branch that returns x. */ -/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink" } } */ +/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c index 37e4d2fe6871..535cb3208f50 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c @@ -16,4 +16,4 @@ void foo (void) } } -/* { dg-final { scan-tree-dump-times "Sinking # VUSE" 4 "sink" } } */ +/* { dg-final { scan-tree-dump-times "Sinking # VUSE" 4 "sink1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-13.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-13.c index a65ba35d4ba9..584fd91f43a8 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-13.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-13.c @@ -21,5 +21,5 @@ void test () /* We should sink/merge all stores and end up with a single BB. */ -/* { dg-final { scan-tree-dump-times "MEM\[^\n\r\]* = 0;" 3 "sink" } } */ -/* { dg-final { scan-tree-dump-times " + +#define HLOG 16 +#define MAX_LIT (1 << 5) +typedef const uint8_t *LZF_HSLOT; +typedef LZF_HSLOT LZF_STATE[1 << (HLOG)]; + +int +compute_on_bytes (uint8_t *in_data, int in_len, uint8_t *out_data, int out_len) +{ + LZF_STATE htab; + + uint8_t *ip = in_data; + uint8_t *op = out_data; + uint8_t *in_end = ip + in_len; + uint8_t *out_end = op + out_len; + uint8_t *ref; + + unsigned long off; + unsigned int hval; + int lit; + + if (!in_len || !out_len) + return 0; + + lit = 0; + op++; + hval = (((ip[0]) << 8) | ip[1]); + + while (ip < in_end - 2) + { + uint8_t *hslot; + + hval = (((hval) << 8) | ip[2]); + hslot = (uint8_t*)(htab + (((hval >> (3 * 8 - 16)) - hval * 5) & ((1 << (16)) - 1))); + + ref = *hslot + in_data; + *hslot = ip - in_data; + + if (1 && (off = ip - ref - 1) < (1 << 13) && ref > in_data + && ref[2] == ip[2] + && ((ref[1] << 8) | ref[0]) == ((ip[1] << 8) | ip[0])) + { + unsigned int len = 2; + unsigned int maxlen = in_end - ip - len; + maxlen + = maxlen > ((1 << 8) + (1 << 3)) ? ((1 << 8) + (1 << 3)) : maxlen; + + if ((op + 3 + 1 >= out_end) != 0) + if (op - !lit + 3 + 1 >= out_end) + return 0; + + op[-lit - 1] = lit - 1; + op -= !lit; + + for (;;) + { + if (maxlen > 16) + { + len++; + if (ref[len] != ip[len]) + break; + len++; + if (ref[len] != ip[len]) + break; + len++; + if (ref[len] != ip[len]) + break; + len++; + if (ref[len] != ip[len]) + break; + + len++; + if (ref[len] != ip[len]) + break; + len++; + if (ref[len] != ip[len]) + break; + len++; + if (ref[len] != ip[len]) + break; + len++; + if (ref[len] != ip[len]) + break; + + len++; + if (ref[len] != ip[len]) + break; + len++; + if (ref[len] != ip[len]) + break; + len++; + if (ref[len] != ip[len]) + break; + len++; + if (ref[len] != ip[len]) + break; + + len++; + if (ref[len] != ip[len]) + break; + len++; + if (ref[len] != ip[len]) + break; + len++; + if (ref[len] != ip[len]) + break; + len++; + if (ref[len] != ip[len]) + break; + } + + do + { + len++; + } + while (len < maxlen && ip[len] == ref[len]); + + break; + } + + len -= 2; + ip++; + + if (len < 7) + { + *op++ = (off >> 8) + (len << 5); + } + else + { + *op++ = (off >> 8) + (7 << 5); + *op++ = len - 7; + } + *op++ = off; + lit = 0; + op++; + ip += len + 1; + + if (ip >= in_end - 2) + break; + + --ip; + --ip; + + hval = (((ip[0]) << 8) | ip[1]); + hval = (((hval) << 8) | ip[2]); + htab[(((hval >> (3 * 8 - 16)) - hval * 5) & ((1 << (16)) - 1))] + = (LZF_HSLOT)(ip - in_data); + ip++; + + hval = (((hval) << 8) | ip[2]); + htab[(((hval >> (3 * 8 - 16)) - hval * 5) & ((1 << (16)) - 1))] + = (LZF_HSLOT)(ip - in_data); + ip++; + } + else + { + if (op >= out_end) + return 0; + + lit++; + *op++ = *ip++; + + if (lit == (1 << 5)) + { + op[-lit - 1] = lit - 1; + lit = 0; + op++; + } + } + } + if (op + 3 > out_end) /* at most 3 bytes can be missing here */ + return 0; + + while (ip < in_end) + { + lit++; + *op++ = *ip++; + if (lit == MAX_LIT) + { + op[-lit - 1] = lit - 1; /* stop run */ + lit = 0; + op++; /* start run */ + } + } + + op[-lit - 1] = lit - 1; /* end run */ + op -= !lit; /* undo run if length is zero */ + + return op - out_data; + } + + /* For this case, pass sink2 sinks statements from hot loop header to loop + exits after gimple loop optimizations, which generates instructions executed + each iteration in loop, but the results are used outside of loop: + With -m64, + "Sinking _367 = (uint8_t *) _320; + from bb 31 to bb 90 + Sinking _320 = _321 + ivtmp.25_326; + from bb 31 to bb 90 + Sinking _321 = (unsigned long) ip_229; + from bb 31 to bb 90 + Sinking len_158 = _322 + 4294967295; + from bb 31 to bb 33" + When -m32, Power and X86 will sink 3 instructions, but arm ilp32 couldn't + sink due to ivopts chooses two IV candidates instead of one, which is + expected, so this case is restricted to lp64 only so far. */ + + /* { dg-final { scan-tree-dump-times "Sunk statements: 4" 1 "sink2" { target lp64 } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-2.c index 6aa5a182a3aa..a0b4734b1e01 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-2.c @@ -9,4 +9,4 @@ bar (int a, int b, int c) return y; } /* We should sink the x = a * b calculation into the else branch */ -/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink" } } */ +/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-3.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-3.c index 599997e0e6bf..e69de29bb2d1 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-3.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-3.c @@ -1,15 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-sink-stats" } */ -extern void foo(int a); -int -main (int argc) -{ - int a; - a = argc + 1; - if (argc + 3) - { - foo (a); - } -} -/* We should sink the a = argc + 1 calculation into the if branch */ -/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-4.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-4.c index 784edd2fc878..1e3cfa93fa85 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-4.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-4.c @@ -17,4 +17,4 @@ main (int argc) foo2 (a); } /* We should sink the first a = b + c calculation into the else branch */ -/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink" } } */ +/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-5.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-5.c index dbdde39add66..f04da5da9b0f 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-5.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-5.c @@ -44,4 +44,4 @@ void foo(int16_t runs[], uint8_t alpha[], int x, int count) } /* We should not sink the next_runs = runs + x calculation after the loop. */ -/* { dg-final { scan-tree-dump-times "Sunk statements:" 0 "sink" } } */ +/* { dg-final { scan-tree-dump-times "Sunk statements:" 0 "sink1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-6.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-6.c index 1abae9f7943e..31f5af330f95 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-6.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-6.c @@ -14,4 +14,4 @@ int foo(int *a, int r) /* *a = 1 should be sunk to the else block. */ -/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink" } } */ +/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-7.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-7.c index ec3288f4e69a..bd748442edc8 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-7.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-7.c @@ -15,4 +15,4 @@ int foo(int *a, int r, short *b) /* *a = 1 should be sunk to the else block. */ -/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink" } } */ +/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-8.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-8.c index 48af4218fc07..4b23b567fd02 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-8.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-8.c @@ -24,4 +24,4 @@ int foo(int *a, int r, short *b) /* *a = 1 should be sunk into the default case. */ -/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink" } } */ +/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-9.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-9.c index 509a76330a4d..32bfc81741aa 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-9.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-9.c @@ -15,4 +15,4 @@ int foo(int *a, int r, int *b) /* *a = 1 should be sunk to the else block. */ -/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink" } } */ +/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink1" } } */ diff --git a/gcc/tree-ssa-sink.c b/gcc/tree-ssa-sink.c index d33e56ef915d..d252cbb5c518 100644 --- a/gcc/tree-ssa-sink.c +++ b/gcc/tree-ssa-sink.c @@ -819,6 +819,7 @@ public: /* opt_pass methods: */ virtual bool gate (function *) { return flag_tree_sink != 0; } virtual unsigned int execute (function *); + opt_pass *clone (void) { return new pass_sink_code (m_ctxt); } }; // class pass_sink_code