]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
backport: re PR rtl-optimization/45352 (ICE: in reset_sched_cycles_in_current_ebb...
authorAndrey Belevantsev <abel@ispras.ru>
Thu, 7 Apr 2011 07:04:02 +0000 (11:04 +0400)
committerAndrey Belevantsev <abel@gcc.gnu.org>
Thu, 7 Apr 2011 07:04:02 +0000 (11:04 +0400)
        Backport from mainline
        2011-01-13  Andrey Belevantsev  <abel@ispras.ru>

        PR rtl-optimization/45352
        * sel-sched.c: Update copyright years.
        (reset_sched_cycles_in_current_ebb): Also recheck the DFA state
        in the advancing loop when we have issued issue_rate insns.

        Backport from mainline
        2010-12-22  Andrey Belevantsev  <abel@ispras.ru>

        PR rtl-optimization/45352
        PR rtl-optimization/46521
        PR rtl-optimization/46522
        * sel-sched.c (reset_sched_cycles_in_current_ebb): Recheck the DFA state
        on the last iteration of the advancing loop.
        (sel_sched_region_1): Propagate the rescheduling bit to the next block
        also for empty blocks.

        Backport from mainline
        2010-11-08  Andrey Belevantsev  <abel@ispras.ru>

        PR rtl-optimization/45352
        * sel-sched.c (find_best_expr): Do not set pneed_stall when
        the variable_issue hook is not implemented.
        (fill_insns): Remove dead variable stall_iterations.
        (init_seqno_1): Force EBB start for resetting sched cycles on any
        successor blocks of the rescheduled region.
        (sel_sched_region_1): Use bitmap_bit_p instead of bitmap_clear_bit.
        (reset_sched_cycles_in_current_ebb): Add debug printing.
        New variable issued_insns.  Advance state when we have issued
        issue_rate insns.

From-SVN: r172088

12 files changed:
gcc/ChangeLog
gcc/sel-sched.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/pr45352-1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/pr45352-2.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/pr45352-3.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/pr45352.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/pr46521.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/pr46522.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr45352-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr45352-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr45352.c [new file with mode: 0644]

index aa1f621ad1a87f9e7e8a6f0576803ad327f94cd9..4b39125c8781b68f225a05294a446d1a4427b369 100644 (file)
@@ -1,3 +1,38 @@
+2011-04-07  Andrey Belevantsev  <abel@ispras.ru>
+
+       Backport from mainline
+       2011-01-13  Andrey Belevantsev  <abel@ispras.ru>
+
+       PR rtl-optimization/45352
+       * sel-sched.c: Update copyright years.
+       (reset_sched_cycles_in_current_ebb): Also recheck the DFA state
+       in the advancing loop when we have issued issue_rate insns.
+
+       Backport from mainline
+       2010-12-22  Andrey Belevantsev  <abel@ispras.ru>
+
+       PR rtl-optimization/45352
+       PR rtl-optimization/46521
+       PR rtl-optimization/46522
+       * sel-sched.c (reset_sched_cycles_in_current_ebb): Recheck the DFA state
+       on the last iteration of the advancing loop.
+       (sel_sched_region_1): Propagate the rescheduling bit to the next block
+       also for empty blocks.
+
+       Backport from mainline
+       2010-11-08  Andrey Belevantsev  <abel@ispras.ru>
+
+       PR rtl-optimization/45352
+       * sel-sched.c (find_best_expr): Do not set pneed_stall when
+       the variable_issue hook is not implemented.
+       (fill_insns): Remove dead variable stall_iterations.
+       (init_seqno_1): Force EBB start for resetting sched cycles on any
+       successor blocks of the rescheduled region.
+       (sel_sched_region_1): Use bitmap_bit_p instead of bitmap_clear_bit.
+       (reset_sched_cycles_in_current_ebb): Add debug printing.
+       New variable issued_insns.  Advance state when we have issued
+       issue_rate insns.
+
 2011-04-07  Andrey Belevantsev  <abel@ispras.ru>
 
        Backport from mainline
index bbfbcf0350758e309927fac110e938cb720c8cce..9abe66dc4b06009f72ea6c5c5c8b858df172e2a2 100644 (file)
@@ -1,5 +1,6 @@
 /* Instruction scheduling pass.  Selective scheduler and pipeliner.
-   Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+   Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
+   Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -4404,7 +4405,8 @@ find_best_expr (av_set_t *av_vliw_ptr, blist_t bnds, fence_t fence,
     {
       can_issue_more = invoke_aftermath_hooks (fence, EXPR_INSN_RTX (best),
                                                can_issue_more);
-      if (can_issue_more == 0)
+      if (targetm.sched.variable_issue
+         && can_issue_more == 0)
         *pneed_stall = 1;
     }
 
@@ -5515,7 +5517,7 @@ fill_insns (fence_t fence, int seqno, ilist_t **scheduled_insns_tailpp)
       blist_t *bnds_tailp1, *bndsp;
       expr_t expr_vliw;
       int need_stall;
-      int was_stall = 0, scheduled_insns = 0, stall_iterations = 0;
+      int was_stall = 0, scheduled_insns = 0;
       int max_insns = pipelining_p ? issue_rate : 2 * issue_rate;
       int max_stall = pipelining_p ? 1 : 3;
       bool last_insn_was_debug = false;
@@ -5534,16 +5536,15 @@ fill_insns (fence_t fence, int seqno, ilist_t **scheduled_insns_tailpp)
       do
         {
           expr_vliw = find_best_expr (&av_vliw, bnds, fence, &need_stall);
-          if (!expr_vliw && need_stall)
+          if (! expr_vliw && need_stall)
             {
               /* All expressions required a stall.  Do not recompute av sets
                  as we'll get the same answer (modulo the insns between
                  the fence and its boundary, which will not be available for
-                 pipelining).  */
-              gcc_assert (! expr_vliw && stall_iterations < 2);
-              was_stall++;
-             /* If we are going to stall for too long, break to recompute av
+                 pipelining).
+                If we are going to stall for too long, break to recompute av
                 sets and bring more insns for pipelining.  */
+              was_stall++;
              if (need_stall <= 3)
                stall_for_cycles (fence, need_stall);
              else
@@ -6718,6 +6719,8 @@ init_seqno_1 (basic_block bb, sbitmap visited_bbs, bitmap blocks_to_reschedule)
 
          init_seqno_1 (succ, visited_bbs, blocks_to_reschedule);
        }
+      else if (blocks_to_reschedule)
+        bitmap_set_bit (forced_ebb_heads, succ->index);
     }
 
   for (insn = BB_END (bb); insn != note; insn = PREV_INSN (insn))
@@ -6972,6 +6975,7 @@ reset_sched_cycles_in_current_ebb (void)
   int last_clock = 0;
   int haifa_last_clock = -1;
   int haifa_clock = 0;
+  int issued_insns = 0;
   insn_t insn;
 
   if (targetm.sched.md_init)
@@ -6990,7 +6994,7 @@ reset_sched_cycles_in_current_ebb (void)
     {
       int cost, haifa_cost;
       int sort_p;
-      bool asm_p, real_insn, after_stall;
+      bool asm_p, real_insn, after_stall, all_issued;
       int clock;
 
       if (!INSN_P (insn))
@@ -7026,7 +7030,9 @@ reset_sched_cycles_in_current_ebb (void)
           haifa_cost = cost;
           after_stall = 1;
         }
-
+      all_issued = issued_insns == issue_rate;
+      if (haifa_cost == 0 && all_issued)
+       haifa_cost = 1;
       if (haifa_cost > 0)
        {
          int i = 0;
@@ -7034,6 +7040,7 @@ reset_sched_cycles_in_current_ebb (void)
          while (haifa_cost--)
            {
              advance_state (curr_state);
+             issued_insns = 0;
               i++;
 
              if (sched_verbose >= 2)
@@ -7050,9 +7057,22 @@ reset_sched_cycles_in_current_ebb (void)
                   && haifa_cost > 0
                   && estimate_insn_cost (insn, curr_state) == 0)
                 break;
-           }
+
+              /* When the data dependency stall is longer than the DFA stall,
+                 and when we have issued exactly issue_rate insns and stalled,
+                 it could be that after this longer stall the insn will again
+                 become unavailable  to the DFA restrictions.  Looks strange
+                 but happens e.g. on x86-64.  So recheck DFA on the last
+                 iteration.  */
+              if ((after_stall || all_issued)
+                  && real_insn
+                  && haifa_cost == 0)
+                haifa_cost = estimate_insn_cost (insn, curr_state);
+            }
 
          haifa_clock += i;
+          if (sched_verbose >= 2)
+            sel_print ("haifa clock: %d\n", haifa_clock);
        }
       else
        gcc_assert (haifa_cost == 0);
@@ -7066,21 +7086,27 @@ reset_sched_cycles_in_current_ebb (void)
                                            &sort_p))
          {
            advance_state (curr_state);
+           issued_insns = 0;
            haifa_clock++;
            if (sched_verbose >= 2)
               {
                 sel_print ("advance_state (dfa_new_cycle)\n");
                 debug_state (curr_state);
+               sel_print ("haifa clock: %d\n", haifa_clock + 1);
               }
           }
 
       if (real_insn)
        {
          cost = state_transition (curr_state, insn);
+         issued_insns++;
 
           if (sched_verbose >= 2)
-            debug_state (curr_state);
-
+           {
+             sel_print ("scheduled insn %d, clock %d\n", INSN_UID (insn),
+                        haifa_clock + 1);
+              debug_state (curr_state);
+           }
          gcc_assert (cost < 0);
        }
 
@@ -7493,21 +7519,23 @@ sel_sched_region_1 (void)
             {
               basic_block bb = EBB_FIRST_BB (i);
 
-              if (sel_bb_empty_p (bb))
-                {
-                  bitmap_clear_bit (blocks_to_reschedule, bb->index);
-                  continue;
-                }
-
               if (bitmap_bit_p (blocks_to_reschedule, bb->index))
                 {
+                  if (! bb_ends_ebb_p (bb))
+                    bitmap_set_bit (blocks_to_reschedule, bb_next_bb (bb)->index);
+                  if (sel_bb_empty_p (bb))
+                    {
+                      bitmap_clear_bit (blocks_to_reschedule, bb->index);
+                      continue;
+                    }
                   clear_outdated_rtx_info (bb);
                   if (sel_insn_is_speculation_check (BB_END (bb))
                       && JUMP_P (BB_END (bb)))
                     bitmap_set_bit (blocks_to_reschedule,
                                     BRANCH_EDGE (bb)->dest->index);
                 }
-              else if (INSN_SCHED_TIMES (sel_bb_head (bb)) <= 0)
+              else if (! sel_bb_empty_p (bb)
+                       && INSN_SCHED_TIMES (sel_bb_head (bb)) <= 0)
                 bitmap_set_bit (blocks_to_reschedule, bb->index);
             }
 
index b6eab437fa55ee7ff762566573f1c0e0211f8536..ac7639b0a2d5e082e1f1c9b0479e7bbf4cebdcba 100644 (file)
@@ -1,3 +1,28 @@
+2011-04-07  Andrey Belevantsev  <abel@ispras.ru>
+
+       Backport from mainline
+       2011-01-13  Andrey Belevantsev  <abel@ispras.ru>
+
+       PR rtl-optimization/45352
+       * gcc.dg/pr45352-3.c: New.
+
+       Backport from mainline
+       2010-12-22  Andrey Belevantsev  <abel@ispras.ru>
+
+       PR rtl-optimization/45352
+       PR rtl-optimization/46521
+       PR rtl-optimization/46522
+       * gcc.dg/pr46521.c: New.
+       * gcc.dg/pr46522.c: New.
+
+       Backport from mainline
+       2010-11-08  Andrey Belevantsev  <abel@ispras.ru>
+
+       PR rtl-optimization/45352
+       gcc.dg/pr45352.c, gcc.dg/pr45352-1.c, gcc.dg/pr45352-2.c: New tests.
+       gcc.target/i386/pr45352.c, gcc.target/i386/pr45352-1.c,
+       gcc.target/i386/pr45352-2.c: New tests.
+
 2011-04-07  Andrey Belevantsev  <abel@ispras.ru>
 
        Backport from mainline
diff --git a/gcc/testsuite/gcc.dg/pr45352-1.c b/gcc/testsuite/gcc.dg/pr45352-1.c
new file mode 100644 (file)
index 0000000..3b092cd
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */
+/* { dg-options "-O3 -fschedule-insns -fschedule-insns2 -fselective-scheduling2 -fsel-sched-pipelining -funroll-loops -fprefetch-loop-arrays" } */
+
+void main1 (float *pa, float *pc)
+{
+  int i;
+  float b[256];
+  float c[256];
+  for (i = 0; i < 256; i++)
+    b[i] = c[i] = pc[i];
+  for (i = 0; i < 256; i++)
+    pa[i] = b[i] * c[i];
+}
diff --git a/gcc/testsuite/gcc.dg/pr45352-2.c b/gcc/testsuite/gcc.dg/pr45352-2.c
new file mode 100644 (file)
index 0000000..eed3847
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */
+/* { dg-options "-O1 -freorder-blocks -fschedule-insns2 -funswitch-loops -fselective-scheduling2 -fsel-sched-pipelining -funroll-all-loops" } */
+void
+foo1 (int *s)
+{
+  s[0] = s[1];
+  while (s[6] - s[8])
+    {
+      s[6] -= s[8];
+      if (s[8] || s[0])
+       {
+         s[3] += s[0];
+         s[4] += s[1];
+       }
+      s[7]++;
+    }
+}
diff --git a/gcc/testsuite/gcc.dg/pr45352-3.c b/gcc/testsuite/gcc.dg/pr45352-3.c
new file mode 100644 (file)
index 0000000..ce7879f
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */
+/* { dg-options "-O -fprofile-generate -fgcse -fno-gcse-lm -fgcse-sm -fno-ivopts -fno-tree-loop-im -ftree-pre -funroll-loops -fno-web -fschedule-insns2 -fselective-scheduling2 -fsel-sched-pipelining" } */
+
+extern volatile float f[];
+
+void foo (void)
+{
+  int i;
+  for (i = 0; i < 100; i++)
+    f[i] = 0;
+  for (i = 0; i < 100; i++)
+    f[i] = 0;
+  for (i = 0; i < 100; i++)
+    if (f[i])
+      __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.dg/pr45352.c b/gcc/testsuite/gcc.dg/pr45352.c
new file mode 100644 (file)
index 0000000..75f9a21
--- /dev/null
@@ -0,0 +1,24 @@
+/* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */
+/* { dg-options "-Os -fselective-scheduling2 -fsel-sched-pipelining -fprofile-generate" } */
+
+static inline void
+bmp_iter_next (int *bi, int *bit_no)
+{
+  *bi >>= 1;
+  *bit_no += 1;
+}
+
+int bmp_iter_set (int *bi, int *bit_no);
+void bitmap_initialize_stat (int, ...);
+void bitmap_clear (void);
+
+void
+df_md_alloc (int bi, int bb_index, void *bb_info)
+{
+  for (; bmp_iter_set (&bi, &bb_index); bmp_iter_next (&bi, &bb_index))
+
+    if (bb_info)
+      bitmap_clear ();
+    else
+      bitmap_initialize_stat (0);
+}
diff --git a/gcc/testsuite/gcc.dg/pr46521.c b/gcc/testsuite/gcc.dg/pr46521.c
new file mode 100644 (file)
index 0000000..0c41c43
--- /dev/null
@@ -0,0 +1,20 @@
+/* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */
+/* { dg-options "-Os -fselective-scheduling2 -fsel-sched-pipelining -fprofile-generate -fno-early-inlining" } */
+
+static void bmp_iter_next (int *bi)
+{
+  *bi >>= 1;
+}
+
+int bmp_iter_set (int *, int);
+void bitmap_clear (void);
+void bitmap_initialize_stat (void);
+
+void df_md_alloc (int bi, int bb_index, int bb_info)
+{
+  for (; bmp_iter_set (&bi, bb_index); bmp_iter_next (&bi))
+    if (bb_info)
+      bitmap_clear ();
+    else
+      bitmap_initialize_stat ();
+}
diff --git a/gcc/testsuite/gcc.dg/pr46522.c b/gcc/testsuite/gcc.dg/pr46522.c
new file mode 100644 (file)
index 0000000..13a5aa9
--- /dev/null
@@ -0,0 +1,33 @@
+/* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */
+/* { dg-options "-O3 -fkeep-inline-functions -fsel-sched-pipelining -fselective-scheduling2 -funroll-loops" } */
+
+struct S
+{
+  unsigned i, j;
+};
+
+static inline void
+bar (struct S *s)
+{
+  if (s->i++ == 1)
+    {
+      s->i = 0;
+      s->j++;
+    }
+}
+
+void
+foo1 (struct S *s)
+{
+  bar (s);
+}
+
+void
+foo2 (struct S s1, struct S s2, int i)
+{
+  while (s1.i != s2.i) {
+    if (i)
+      *(unsigned *) 0 |= (1U << s1.i);
+    bar (&s1);
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr45352-1.c b/gcc/testsuite/gcc.target/i386/pr45352-1.c
new file mode 100644 (file)
index 0000000..5cd1bd8
--- /dev/null
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-mtune=amdfam10 -O3 -fpeel-loops -fselective-scheduling2 -fsel-sched-pipelining -fPIC" } */
+
+static int FIR_Tab_16[16][16];
+
+void
+V_Pass_Avrg_16_C_ref (int *Dst, int *Src, int W, int BpS, int Rnd)
+{
+  while (W-- > 0)
+    {
+      int i, k;
+      int Sums[16] = { };
+      for (i = 0; i < 16; ++i)
+       for (k = 0; k < 16; ++k)
+         Sums[k] += FIR_Tab_16[i][k] * Src[i];
+      for (i = 0; i < 16; ++i)
+       Dst[i] = Sums[i] + Src[i];
+    }
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr45352-2.c b/gcc/testsuite/gcc.target/i386/pr45352-2.c
new file mode 100644 (file)
index 0000000..5f9ebb1
--- /dev/null
@@ -0,0 +1,109 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -mtune=amdfam10 -fexpensive-optimizations -fgcse -foptimize-register-move -freorder-blocks -fschedule-insns2 -funswitch-loops -fgcse-las -fselective-scheduling2 -fsel-sched-pipelining -funroll-all-loops" } */
+
+typedef char uint8_t;
+typedef uint32_t;
+typedef vo_frame_t;
+struct vo_frame_s
+{
+    uint8_t base[3];
+  int pitches[3];};
+typedef struct
+{
+void
+    (*proc_macro_block)
+    (void);
+}
+xine_xvmc_t;
+typedef struct
+{
+  uint8_t ref[2][3];
+int pmv;
+}
+motion_t;
+typedef struct
+{
+  uint32_t bitstream_buf;
+  int bitstream_bits;
+    uint8_t * bitstream_ptr;
+    uint8_t dest[3];
+  int pitches[3];
+  int offset;
+    motion_t b_motion;
+    motion_t f_motion;
+  int v_offset;
+  int coded_picture_width;
+  int picture_structure;
+struct vo_frame_s *current_frame;}
+picture_t;
+typedef struct
+{
+int xvmc_last_slice_code;}
+mpeg2dec_accel_t;
+static int bitstream_init (picture_t * picture, void *start)
+{
+  picture->bitstream_ptr = start;
+  return (int) (long) start;
+}
+static slice_xvmc_init (picture_t * picture, int code)
+{
+  int offset;
+  struct vo_frame_s *forward_reference_frame;
+  offset = picture->picture_structure == 2;
+  picture->pitches[0] = picture->current_frame->pitches[0];
+  picture->pitches[1] = picture->current_frame->pitches[1];
+  if (picture)
+    picture->f_motion.ref
+      [0]
+      [0]
+      = (char) (long) (forward_reference_frame->base + (offset ? picture->pitches[0] : 0));
+  picture->f_motion.ref[0][1] = (offset);
+  if (picture->picture_structure)
+      picture->pitches[0] <<= picture->pitches[1] <<= 1;
+  offset = 0;
+  while (1)
+    {
+      if (picture->bitstream_buf >= 0x08000000)
+         break;
+      switch (picture->bitstream_buf >> 12)
+       {
+       case 8:
+         offset += 33;
+               picture->bitstream_buf
+                 |=
+                 picture->bitstream_ptr[1] << picture->bitstream_bits;
+       }
+    }
+  picture->offset = (offset);
+  while (picture->offset - picture->coded_picture_width >= 0)
+    {
+      picture->offset -= picture->coded_picture_width;
+      if (picture->current_frame)
+       {
+         picture->dest[0] += picture->pitches[0];
+         picture->dest[1] += picture->pitches[1];
+       }
+      picture->v_offset += 16;
+    }
+}
+
+void
+mpeg2_xvmc_slice
+  (mpeg2dec_accel_t * accel, picture_t * picture, int code, uint8_t buffer,int mba_inc)
+{
+  xine_xvmc_t * xvmc = (xine_xvmc_t *) (long) bitstream_init (picture, (void *) (long) buffer);
+  slice_xvmc_init (picture, code);
+    while (1)
+      {
+       if (picture)
+           break;
+       switch (picture->bitstream_buf)
+         {
+         case 8:
+           mba_inc += accel->xvmc_last_slice_code = code;
+                 xvmc->proc_macro_block   ();
+           while (mba_inc)
+             ;
+         }
+      }
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr45352.c b/gcc/testsuite/gcc.target/i386/pr45352.c
new file mode 100644 (file)
index 0000000..ef710ce
--- /dev/null
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=amdfam10 -fselective-scheduling2 -fsel-sched-pipelining -funroll-all-loops" } */
+
+struct S
+{
+  struct
+  {
+    int i;
+  } **p;
+  int x;
+  int y;
+};
+
+extern int baz (void);
+extern int bar (void *, int, int);
+
+void
+foo (struct S *s)
+{
+  int i;
+  for (i = 0; i < s->x; i++)
+    bar (s->p[i], baz (), s->y);
+  for (i = 0; i < s->x; i++)
+    s->p[i]->i++;
+}