From: Andrey Belevantsev Date: Thu, 7 Apr 2011 07:04:02 +0000 (+0400) Subject: backport: re PR rtl-optimization/45352 (ICE: in reset_sched_cycles_in_current_ebb... X-Git-Tag: releases/gcc-4.5.3~98 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f3f282fafa56a45f35e7142ecd15decb86c2d44f;p=thirdparty%2Fgcc.git backport: re PR rtl-optimization/45352 (ICE: in reset_sched_cycles_in_current_ebb, at sel-sched.c:7058) Backport from mainline 2011-01-13 Andrey Belevantsev PR rtl-optimization/45352 * sel-sched.c: Update copyright years. (reset_sched_cycles_in_current_ebb): Also recheck the DFA state in the advancing loop when we have issued issue_rate insns. Backport from mainline 2010-12-22 Andrey Belevantsev PR rtl-optimization/45352 PR rtl-optimization/46521 PR rtl-optimization/46522 * sel-sched.c (reset_sched_cycles_in_current_ebb): Recheck the DFA state on the last iteration of the advancing loop. (sel_sched_region_1): Propagate the rescheduling bit to the next block also for empty blocks. Backport from mainline 2010-11-08 Andrey Belevantsev PR rtl-optimization/45352 * sel-sched.c (find_best_expr): Do not set pneed_stall when the variable_issue hook is not implemented. (fill_insns): Remove dead variable stall_iterations. (init_seqno_1): Force EBB start for resetting sched cycles on any successor blocks of the rescheduled region. (sel_sched_region_1): Use bitmap_bit_p instead of bitmap_clear_bit. (reset_sched_cycles_in_current_ebb): Add debug printing. New variable issued_insns. Advance state when we have issued issue_rate insns. From-SVN: r172088 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index aa1f621ad1a8..4b39125c8781 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,38 @@ +2011-04-07 Andrey Belevantsev + + Backport from mainline + 2011-01-13 Andrey Belevantsev + + PR rtl-optimization/45352 + * sel-sched.c: Update copyright years. + (reset_sched_cycles_in_current_ebb): Also recheck the DFA state + in the advancing loop when we have issued issue_rate insns. + + Backport from mainline + 2010-12-22 Andrey Belevantsev + + PR rtl-optimization/45352 + PR rtl-optimization/46521 + PR rtl-optimization/46522 + * sel-sched.c (reset_sched_cycles_in_current_ebb): Recheck the DFA state + on the last iteration of the advancing loop. + (sel_sched_region_1): Propagate the rescheduling bit to the next block + also for empty blocks. + + Backport from mainline + 2010-11-08 Andrey Belevantsev + + PR rtl-optimization/45352 + * sel-sched.c (find_best_expr): Do not set pneed_stall when + the variable_issue hook is not implemented. + (fill_insns): Remove dead variable stall_iterations. + (init_seqno_1): Force EBB start for resetting sched cycles on any + successor blocks of the rescheduled region. + (sel_sched_region_1): Use bitmap_bit_p instead of bitmap_clear_bit. + (reset_sched_cycles_in_current_ebb): Add debug printing. + New variable issued_insns. Advance state when we have issued + issue_rate insns. + 2011-04-07 Andrey Belevantsev Backport from mainline diff --git a/gcc/sel-sched.c b/gcc/sel-sched.c index bbfbcf035075..9abe66dc4b06 100644 --- a/gcc/sel-sched.c +++ b/gcc/sel-sched.c @@ -1,5 +1,6 @@ /* Instruction scheduling pass. Selective scheduler and pipeliner. - Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 + Free Software Foundation, Inc. This file is part of GCC. @@ -4404,7 +4405,8 @@ find_best_expr (av_set_t *av_vliw_ptr, blist_t bnds, fence_t fence, { can_issue_more = invoke_aftermath_hooks (fence, EXPR_INSN_RTX (best), can_issue_more); - if (can_issue_more == 0) + if (targetm.sched.variable_issue + && can_issue_more == 0) *pneed_stall = 1; } @@ -5515,7 +5517,7 @@ fill_insns (fence_t fence, int seqno, ilist_t **scheduled_insns_tailpp) blist_t *bnds_tailp1, *bndsp; expr_t expr_vliw; int need_stall; - int was_stall = 0, scheduled_insns = 0, stall_iterations = 0; + int was_stall = 0, scheduled_insns = 0; int max_insns = pipelining_p ? issue_rate : 2 * issue_rate; int max_stall = pipelining_p ? 1 : 3; bool last_insn_was_debug = false; @@ -5534,16 +5536,15 @@ fill_insns (fence_t fence, int seqno, ilist_t **scheduled_insns_tailpp) do { expr_vliw = find_best_expr (&av_vliw, bnds, fence, &need_stall); - if (!expr_vliw && need_stall) + if (! expr_vliw && need_stall) { /* All expressions required a stall. Do not recompute av sets as we'll get the same answer (modulo the insns between the fence and its boundary, which will not be available for - pipelining). */ - gcc_assert (! expr_vliw && stall_iterations < 2); - was_stall++; - /* If we are going to stall for too long, break to recompute av + pipelining). + If we are going to stall for too long, break to recompute av sets and bring more insns for pipelining. */ + was_stall++; if (need_stall <= 3) stall_for_cycles (fence, need_stall); else @@ -6718,6 +6719,8 @@ init_seqno_1 (basic_block bb, sbitmap visited_bbs, bitmap blocks_to_reschedule) init_seqno_1 (succ, visited_bbs, blocks_to_reschedule); } + else if (blocks_to_reschedule) + bitmap_set_bit (forced_ebb_heads, succ->index); } for (insn = BB_END (bb); insn != note; insn = PREV_INSN (insn)) @@ -6972,6 +6975,7 @@ reset_sched_cycles_in_current_ebb (void) int last_clock = 0; int haifa_last_clock = -1; int haifa_clock = 0; + int issued_insns = 0; insn_t insn; if (targetm.sched.md_init) @@ -6990,7 +6994,7 @@ reset_sched_cycles_in_current_ebb (void) { int cost, haifa_cost; int sort_p; - bool asm_p, real_insn, after_stall; + bool asm_p, real_insn, after_stall, all_issued; int clock; if (!INSN_P (insn)) @@ -7026,7 +7030,9 @@ reset_sched_cycles_in_current_ebb (void) haifa_cost = cost; after_stall = 1; } - + all_issued = issued_insns == issue_rate; + if (haifa_cost == 0 && all_issued) + haifa_cost = 1; if (haifa_cost > 0) { int i = 0; @@ -7034,6 +7040,7 @@ reset_sched_cycles_in_current_ebb (void) while (haifa_cost--) { advance_state (curr_state); + issued_insns = 0; i++; if (sched_verbose >= 2) @@ -7050,9 +7057,22 @@ reset_sched_cycles_in_current_ebb (void) && haifa_cost > 0 && estimate_insn_cost (insn, curr_state) == 0) break; - } + + /* When the data dependency stall is longer than the DFA stall, + and when we have issued exactly issue_rate insns and stalled, + it could be that after this longer stall the insn will again + become unavailable to the DFA restrictions. Looks strange + but happens e.g. on x86-64. So recheck DFA on the last + iteration. */ + if ((after_stall || all_issued) + && real_insn + && haifa_cost == 0) + haifa_cost = estimate_insn_cost (insn, curr_state); + } haifa_clock += i; + if (sched_verbose >= 2) + sel_print ("haifa clock: %d\n", haifa_clock); } else gcc_assert (haifa_cost == 0); @@ -7066,21 +7086,27 @@ reset_sched_cycles_in_current_ebb (void) &sort_p)) { advance_state (curr_state); + issued_insns = 0; haifa_clock++; if (sched_verbose >= 2) { sel_print ("advance_state (dfa_new_cycle)\n"); debug_state (curr_state); + sel_print ("haifa clock: %d\n", haifa_clock + 1); } } if (real_insn) { cost = state_transition (curr_state, insn); + issued_insns++; if (sched_verbose >= 2) - debug_state (curr_state); - + { + sel_print ("scheduled insn %d, clock %d\n", INSN_UID (insn), + haifa_clock + 1); + debug_state (curr_state); + } gcc_assert (cost < 0); } @@ -7493,21 +7519,23 @@ sel_sched_region_1 (void) { basic_block bb = EBB_FIRST_BB (i); - if (sel_bb_empty_p (bb)) - { - bitmap_clear_bit (blocks_to_reschedule, bb->index); - continue; - } - if (bitmap_bit_p (blocks_to_reschedule, bb->index)) { + if (! bb_ends_ebb_p (bb)) + bitmap_set_bit (blocks_to_reschedule, bb_next_bb (bb)->index); + if (sel_bb_empty_p (bb)) + { + bitmap_clear_bit (blocks_to_reschedule, bb->index); + continue; + } clear_outdated_rtx_info (bb); if (sel_insn_is_speculation_check (BB_END (bb)) && JUMP_P (BB_END (bb))) bitmap_set_bit (blocks_to_reschedule, BRANCH_EDGE (bb)->dest->index); } - else if (INSN_SCHED_TIMES (sel_bb_head (bb)) <= 0) + else if (! sel_bb_empty_p (bb) + && INSN_SCHED_TIMES (sel_bb_head (bb)) <= 0) bitmap_set_bit (blocks_to_reschedule, bb->index); } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b6eab437fa55..ac7639b0a2d5 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,28 @@ +2011-04-07 Andrey Belevantsev + + Backport from mainline + 2011-01-13 Andrey Belevantsev + + PR rtl-optimization/45352 + * gcc.dg/pr45352-3.c: New. + + Backport from mainline + 2010-12-22 Andrey Belevantsev + + PR rtl-optimization/45352 + PR rtl-optimization/46521 + PR rtl-optimization/46522 + * gcc.dg/pr46521.c: New. + * gcc.dg/pr46522.c: New. + + Backport from mainline + 2010-11-08 Andrey Belevantsev + + PR rtl-optimization/45352 + gcc.dg/pr45352.c, gcc.dg/pr45352-1.c, gcc.dg/pr45352-2.c: New tests. + gcc.target/i386/pr45352.c, gcc.target/i386/pr45352-1.c, + gcc.target/i386/pr45352-2.c: New tests. + 2011-04-07 Andrey Belevantsev Backport from mainline diff --git a/gcc/testsuite/gcc.dg/pr45352-1.c b/gcc/testsuite/gcc.dg/pr45352-1.c new file mode 100644 index 000000000000..3b092cd13ed5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr45352-1.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */ +/* { dg-options "-O3 -fschedule-insns -fschedule-insns2 -fselective-scheduling2 -fsel-sched-pipelining -funroll-loops -fprefetch-loop-arrays" } */ + +void main1 (float *pa, float *pc) +{ + int i; + float b[256]; + float c[256]; + for (i = 0; i < 256; i++) + b[i] = c[i] = pc[i]; + for (i = 0; i < 256; i++) + pa[i] = b[i] * c[i]; +} diff --git a/gcc/testsuite/gcc.dg/pr45352-2.c b/gcc/testsuite/gcc.dg/pr45352-2.c new file mode 100644 index 000000000000..eed3847177c0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr45352-2.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */ +/* { dg-options "-O1 -freorder-blocks -fschedule-insns2 -funswitch-loops -fselective-scheduling2 -fsel-sched-pipelining -funroll-all-loops" } */ +void +foo1 (int *s) +{ + s[0] = s[1]; + while (s[6] - s[8]) + { + s[6] -= s[8]; + if (s[8] || s[0]) + { + s[3] += s[0]; + s[4] += s[1]; + } + s[7]++; + } +} diff --git a/gcc/testsuite/gcc.dg/pr45352-3.c b/gcc/testsuite/gcc.dg/pr45352-3.c new file mode 100644 index 000000000000..ce7879f860a4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr45352-3.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */ +/* { dg-options "-O -fprofile-generate -fgcse -fno-gcse-lm -fgcse-sm -fno-ivopts -fno-tree-loop-im -ftree-pre -funroll-loops -fno-web -fschedule-insns2 -fselective-scheduling2 -fsel-sched-pipelining" } */ + +extern volatile float f[]; + +void foo (void) +{ + int i; + for (i = 0; i < 100; i++) + f[i] = 0; + for (i = 0; i < 100; i++) + f[i] = 0; + for (i = 0; i < 100; i++) + if (f[i]) + __builtin_abort (); +} diff --git a/gcc/testsuite/gcc.dg/pr45352.c b/gcc/testsuite/gcc.dg/pr45352.c new file mode 100644 index 000000000000..75f9a2137063 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr45352.c @@ -0,0 +1,24 @@ +/* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */ +/* { dg-options "-Os -fselective-scheduling2 -fsel-sched-pipelining -fprofile-generate" } */ + +static inline void +bmp_iter_next (int *bi, int *bit_no) +{ + *bi >>= 1; + *bit_no += 1; +} + +int bmp_iter_set (int *bi, int *bit_no); +void bitmap_initialize_stat (int, ...); +void bitmap_clear (void); + +void +df_md_alloc (int bi, int bb_index, void *bb_info) +{ + for (; bmp_iter_set (&bi, &bb_index); bmp_iter_next (&bi, &bb_index)) + + if (bb_info) + bitmap_clear (); + else + bitmap_initialize_stat (0); +} diff --git a/gcc/testsuite/gcc.dg/pr46521.c b/gcc/testsuite/gcc.dg/pr46521.c new file mode 100644 index 000000000000..0c41c4328f88 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr46521.c @@ -0,0 +1,20 @@ +/* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */ +/* { dg-options "-Os -fselective-scheduling2 -fsel-sched-pipelining -fprofile-generate -fno-early-inlining" } */ + +static void bmp_iter_next (int *bi) +{ + *bi >>= 1; +} + +int bmp_iter_set (int *, int); +void bitmap_clear (void); +void bitmap_initialize_stat (void); + +void df_md_alloc (int bi, int bb_index, int bb_info) +{ + for (; bmp_iter_set (&bi, bb_index); bmp_iter_next (&bi)) + if (bb_info) + bitmap_clear (); + else + bitmap_initialize_stat (); +} diff --git a/gcc/testsuite/gcc.dg/pr46522.c b/gcc/testsuite/gcc.dg/pr46522.c new file mode 100644 index 000000000000..13a5aa9da575 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr46522.c @@ -0,0 +1,33 @@ +/* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */ +/* { dg-options "-O3 -fkeep-inline-functions -fsel-sched-pipelining -fselective-scheduling2 -funroll-loops" } */ + +struct S +{ + unsigned i, j; +}; + +static inline void +bar (struct S *s) +{ + if (s->i++ == 1) + { + s->i = 0; + s->j++; + } +} + +void +foo1 (struct S *s) +{ + bar (s); +} + +void +foo2 (struct S s1, struct S s2, int i) +{ + while (s1.i != s2.i) { + if (i) + *(unsigned *) 0 |= (1U << s1.i); + bar (&s1); + } +} diff --git a/gcc/testsuite/gcc.target/i386/pr45352-1.c b/gcc/testsuite/gcc.target/i386/pr45352-1.c new file mode 100644 index 000000000000..5cd1bd842d80 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr45352-1.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-mtune=amdfam10 -O3 -fpeel-loops -fselective-scheduling2 -fsel-sched-pipelining -fPIC" } */ + +static int FIR_Tab_16[16][16]; + +void +V_Pass_Avrg_16_C_ref (int *Dst, int *Src, int W, int BpS, int Rnd) +{ + while (W-- > 0) + { + int i, k; + int Sums[16] = { }; + for (i = 0; i < 16; ++i) + for (k = 0; k < 16; ++k) + Sums[k] += FIR_Tab_16[i][k] * Src[i]; + for (i = 0; i < 16; ++i) + Dst[i] = Sums[i] + Src[i]; + } +} diff --git a/gcc/testsuite/gcc.target/i386/pr45352-2.c b/gcc/testsuite/gcc.target/i386/pr45352-2.c new file mode 100644 index 000000000000..5f9ebb18fc81 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr45352-2.c @@ -0,0 +1,109 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -mtune=amdfam10 -fexpensive-optimizations -fgcse -foptimize-register-move -freorder-blocks -fschedule-insns2 -funswitch-loops -fgcse-las -fselective-scheduling2 -fsel-sched-pipelining -funroll-all-loops" } */ + +typedef char uint8_t; +typedef uint32_t; +typedef vo_frame_t; +struct vo_frame_s +{ + uint8_t base[3]; + int pitches[3];}; +typedef struct +{ +void + (*proc_macro_block) + (void); +} +xine_xvmc_t; +typedef struct +{ + uint8_t ref[2][3]; +int pmv; +} +motion_t; +typedef struct +{ + uint32_t bitstream_buf; + int bitstream_bits; + uint8_t * bitstream_ptr; + uint8_t dest[3]; + int pitches[3]; + int offset; + motion_t b_motion; + motion_t f_motion; + int v_offset; + int coded_picture_width; + int picture_structure; +struct vo_frame_s *current_frame;} +picture_t; +typedef struct +{ +int xvmc_last_slice_code;} +mpeg2dec_accel_t; +static int bitstream_init (picture_t * picture, void *start) +{ + picture->bitstream_ptr = start; + return (int) (long) start; +} +static slice_xvmc_init (picture_t * picture, int code) +{ + int offset; + struct vo_frame_s *forward_reference_frame; + offset = picture->picture_structure == 2; + picture->pitches[0] = picture->current_frame->pitches[0]; + picture->pitches[1] = picture->current_frame->pitches[1]; + if (picture) + picture->f_motion.ref + [0] + [0] + = (char) (long) (forward_reference_frame->base + (offset ? picture->pitches[0] : 0)); + picture->f_motion.ref[0][1] = (offset); + if (picture->picture_structure) + picture->pitches[0] <<= picture->pitches[1] <<= 1; + offset = 0; + while (1) + { + if (picture->bitstream_buf >= 0x08000000) + break; + switch (picture->bitstream_buf >> 12) + { + case 8: + offset += 33; + picture->bitstream_buf + |= + picture->bitstream_ptr[1] << picture->bitstream_bits; + } + } + picture->offset = (offset); + while (picture->offset - picture->coded_picture_width >= 0) + { + picture->offset -= picture->coded_picture_width; + if (picture->current_frame) + { + picture->dest[0] += picture->pitches[0]; + picture->dest[1] += picture->pitches[1]; + } + picture->v_offset += 16; + } +} + +void +mpeg2_xvmc_slice + (mpeg2dec_accel_t * accel, picture_t * picture, int code, uint8_t buffer,int mba_inc) +{ + xine_xvmc_t * xvmc = (xine_xvmc_t *) (long) bitstream_init (picture, (void *) (long) buffer); + slice_xvmc_init (picture, code); + while (1) + { + if (picture) + break; + switch (picture->bitstream_buf) + { + case 8: + mba_inc += accel->xvmc_last_slice_code = code; + xvmc->proc_macro_block (); + while (mba_inc) + ; + } + } +} diff --git a/gcc/testsuite/gcc.target/i386/pr45352.c b/gcc/testsuite/gcc.target/i386/pr45352.c new file mode 100644 index 000000000000..ef710ce6b912 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr45352.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=amdfam10 -fselective-scheduling2 -fsel-sched-pipelining -funroll-all-loops" } */ + +struct S +{ + struct + { + int i; + } **p; + int x; + int y; +}; + +extern int baz (void); +extern int bar (void *, int, int); + +void +foo (struct S *s) +{ + int i; + for (i = 0; i < s->x; i++) + bar (s->p[i], baz (), s->y); + for (i = 0; i < s->x; i++) + s->p[i]->i++; +}