re PR tree-optimization/64058 (Performance degradation after r216304)

author Jeff Law <law@redhat.com>

Wed, 23 Mar 2016 13:20:16 +0000 (07:20 -0600)

committer Jeff Law <law@gcc.gnu.org>

Wed, 23 Mar 2016 13:20:16 +0000 (07:20 -0600)
author Jeff Law <law@redhat.com>
Wed, 23 Mar 2016 13:20:16 +0000 (07:20 -0600)
committer Jeff Law <law@gcc.gnu.org>
Wed, 23 Mar 2016 13:20:16 +0000 (07:20 -0600)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 21366dc2061cb061aedc1d356cd04d1d92bf0a90..f9b2af2fa852ff0750722d1b0665c358621ef077 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,23 @@
+2016-03-23  Jeff Law  <law@redhat.com>
+
+       PR tree-optimization/64058
+       * tree-ssa-coalesce.c (struct coalesce_pair): Add new field
+       CONFLICT_COUNT.
+       (struct ssa_conflicts): Move up earlier in the file.
+       (conflicts_, var_map_): New static variables.
+       (initialize_conflict_count): New function to initialize the
+       CONFLICT_COUNT field for each conflict pair.
+       (compare_pairs): Lazily initialize the conflict count and use it
+       as the first tie-breaker.
+       (sort_coalesce_list): Add new arguments conflicts, map.  Initialize
+       and wipe conflicts_ and map_ around the call to qsort.  Remove
+       special case for 2 coalesce pairs.
+       * bitmap.c (bitmap_count_unique_bits): New function.
+       (bitmap_count_bits_in_word): New function, extracted from
+       bitmap_count_bits.
+       (bitmap_count_bits): Use bitmap_count_bits_in_word.
+       * bitmap.h (bitmap_count_unique_bits): Declare it.
+
  2016-03-23  Ilya Enkovich  <enkovich.gnu@gmail.com>
  
         PR target/69917
diff --git a/gcc/bitmap.c b/gcc/bitmap.c

index ac20ae5830f4a9d7f422bbc2277fc4b4055e34cf..0c05512b66691b0f40d40c8b683398584643d6f0 100644 (file)
--- a/gcc/bitmap.c
+++ b/gcc/bitmap.c
@@ -662,6 +662,26 @@ bitmap_popcount (BITMAP_WORD a)
    return ret;
  }
  #endif
+
+/* Count and return the number of bits set in the bitmap word BITS.  */
+static unsigned long
+bitmap_count_bits_in_word (const BITMAP_WORD *bits)
+{
+  unsigned long count = 0;
+
+  for (unsigned ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++)
+    {
+#if GCC_VERSION >= 3400
+      /* Note that popcountl matches BITMAP_WORD in type, so the actual size
+        of BITMAP_WORD is not material.  */
+      count += __builtin_popcountl (bits[ix]);
+#else
+      count += bitmap_popcount (bits[ix]);
+#endif
+    }
+  return count;
+}
+
  /* Count the number of bits set in the bitmap, and return it.  */
  
  unsigned long
@@ -669,19 +689,44 @@ bitmap_count_bits (const_bitmap a)
  {
    unsigned long count = 0;
    const bitmap_element *elt;
-  unsigned ix;
  
    for (elt = a->first; elt; elt = elt->next)
+    count += bitmap_count_bits_in_word (elt->bits);
+
+  return count;
+}
+
+/* Count the number of unique bits set in A and B and return it.  */
+
+unsigned long
+bitmap_count_unique_bits (const_bitmap a, const_bitmap b)
+{
+  unsigned long count = 0;
+  const bitmap_element *elt_a, *elt_b;
+
+  for (elt_a = a->first, elt_b = b->first; elt_a && elt_b; )
      {
-      for (ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++)
+      /* If we're at different indices, then count all the bits
+        in the lower element.  If we're at the same index, then
+        count the bits in the IOR of the two elements.  */
+      if (elt_a->indx < elt_b->indx)
         {
-#if GCC_VERSION >= 3400
-         /* Note that popcountl matches BITMAP_WORD in type, so the actual size
-        of BITMAP_WORD is not material.  */
-         count += __builtin_popcountl (elt->bits[ix]);
-#else
-         count += bitmap_popcount (elt->bits[ix]);
-#endif
+         count += bitmap_count_bits_in_word (elt_a->bits);
+         elt_a = elt_a->next;
+       }
+      else if (elt_b->indx < elt_a->indx)
+       {
+         count += bitmap_count_bits_in_word (elt_b->bits);
+         elt_b = elt_b->next;
+       }
+      else
+       {
+         BITMAP_WORD bits[BITMAP_ELEMENT_WORDS];
+         for (unsigned ix = 0; ix != BITMAP_ELEMENT_WORDS; ix++)
+           bits[ix] = elt_a->bits[ix] | elt_b->bits[ix];
+         count += bitmap_count_bits_in_word (bits);
+         elt_a = elt_a->next;
+         elt_b = elt_b->next;
         }
      }
    return count;
diff --git a/gcc/bitmap.h b/gcc/bitmap.h

index 805e37eb47bb4aec230f691a2020fea0811d271b..111571186b76402f09e5326fdd81c4e2f01fa172 100644 (file)
--- a/gcc/bitmap.h
+++ b/gcc/bitmap.h
@@ -280,6 +280,9 @@ extern bool bitmap_single_bit_set_p (const_bitmap);
  /* Count the number of bits set in the bitmap.  */
  extern unsigned long bitmap_count_bits (const_bitmap);
  
+/* Count the number of unique bits set across the two bitmaps.  */
+extern unsigned long bitmap_count_unique_bits (const_bitmap, const_bitmap);
+
  /* Boolean operations on bitmaps.  The _into variants are two operand
     versions that modify the first source operand.  The other variants
     are three operand versions that to not destroy the source bitmaps.
diff --git a/gcc/tree-ssa-coalesce.c b/gcc/tree-ssa-coalesce.c

index e49876eac3349374a3cd9240ad27a5652d783a82..57fc6530e69517426efd8d935e927c6a83bf35a2 100644 (file)
--- a/gcc/tree-ssa-coalesce.c
+++ b/gcc/tree-ssa-coalesce.c
@@ -51,12 +51,40 @@ struct coalesce_pair
    int second_element;
    int cost;
  
+  /* A count of the number of unique partitions this pair would conflict
+     with if coalescing was successful.  This is the secondary sort key,
+     given two pairs with equal costs, we will prefer the pair with a smaller
+     conflict set.
+
+     This is lazily initialized when we discover two coalescing pairs have
+     the same primary cost.
+
+     Note this is not updated and propagated as pairs are coalesced.  */
+  int conflict_count;
+
    /* The order in which coalescing pairs are discovered is recorded in this
       field, which is used as the final tie breaker when sorting coalesce
       pairs.  */
    int index;
  };
  
+/* This represents a conflict graph.  Implemented as an array of bitmaps.
+   A full matrix is used for conflicts rather than just upper triangular form.
+   this make sit much simpler and faster to perform conflict merges.  */
+
+struct ssa_conflicts
+{
+  bitmap_obstack obstack;      /* A place to allocate our bitmaps.  */
+  vec<bitmap> conflicts;
+};
+
+/* The narrow API of the qsort comparison function doesn't allow easy
+   access to additional arguments.  So we have two globals (ick) to hold
+   the data we need.  They're initialized before the call to qsort and
+   wiped immediately after.  */
+static ssa_conflicts *conflicts_;
+static var_map map_;
+
  /* Coalesce pair hashtable helpers.  */
  
  struct coalesce_pair_hasher : nofree_ptr_hash <coalesce_pair>
@@ -303,6 +331,7 @@ find_coalesce_pair (coalesce_list *cl, int p1, int p2, bool create)
        pair->second_element = p.second_element;
        pair->cost = 0;
        pair->index = num_coalesce_pairs (cl);
+      pair->conflict_count = 0;
        *slot = pair;
      }
  
@@ -345,21 +374,66 @@ add_coalesce (coalesce_list *cl, int p1, int p2, int value)
      }
  }
  
+/* Compute and record how many unique conflicts would exist for the
+   representative partition for each coalesce pair in CL.
+
+   CONFLICTS is the conflict graph and MAP is the current partition view.  */
+
+static void
+initialize_conflict_count (coalesce_pair *p,
+                          ssa_conflicts *conflicts,
+                          var_map map)
+{
+  int p1 = var_to_partition (map, ssa_name (p->first_element));
+  int p2 = var_to_partition (map, ssa_name (p->second_element));
+
+  /* 4 cases.  If both P1 and P2 have conflicts, then build their
+     union and count the members.  Else handle the degenerate cases
+     in the obvious ways.  */
+  if (conflicts->conflicts[p1] && conflicts->conflicts[p2])
+    p->conflict_count = bitmap_count_unique_bits (conflicts->conflicts[p1],
+                                                 conflicts->conflicts[p2]);
+  else if (conflicts->conflicts[p1])
+    p->conflict_count = bitmap_count_bits (conflicts->conflicts[p1]);
+  else if (conflicts->conflicts[p2])
+    p->conflict_count = bitmap_count_bits (conflicts->conflicts[p2]);
+  else
+    p->conflict_count = 0;
+}
+
  
  /* Comparison function to allow qsort to sort P1 and P2 in Ascending order.  */
  
  static int
  compare_pairs (const void *p1, const void *p2)
  {
-  const coalesce_pair *const *const pp1 = (const coalesce_pair *const *) p1;
-  const coalesce_pair *const *const pp2 = (const coalesce_pair *const *) p2;
+  coalesce_pair *const *const pp1 = (coalesce_pair *const *) p1;
+  coalesce_pair *const *const pp2 = (coalesce_pair *const *) p2;
    int result;
  
    result = (* pp1)->cost - (* pp2)->cost;
-  /* And if everything else is equal, then sort based on which
-     coalesce pair was found first.  */
+  /* We use the size of the resulting conflict set as the secondary sort key.
+     Given two equal costing coalesce pairs, we want to prefer the pair that
+     has the smaller conflict set.  */
    if (result == 0)
-    result = (*pp2)->index - (*pp1)->index;
+    {
+      if (flag_expensive_optimizations)
+       {
+         /* Lazily initialize the conflict counts as it's fairly expensive
+            to compute.  */
+         if ((*pp2)->conflict_count == 0)
+           initialize_conflict_count (*pp2, conflicts_, map_);
+         if ((*pp1)->conflict_count == 0)
+           initialize_conflict_count (*pp1, conflicts_, map_);
+
+         result = (*pp2)->conflict_count - (*pp1)->conflict_count;
+       }
+
+      /* And if everything else is equal, then sort based on which
+        coalesce pair was found first.  */
+      if (result == 0)
+       result = (*pp2)->index - (*pp1)->index;
+    }
  
    return result;
  }
@@ -374,7 +448,7 @@ compare_pairs (const void *p1, const void *p2)
     in order from most important coalesce to least important.  */
  
  static void
-sort_coalesce_list (coalesce_list *cl)
+sort_coalesce_list (coalesce_list *cl, ssa_conflicts *conflicts, var_map map)
  {
    unsigned x, num;
    coalesce_pair *p;
@@ -400,19 +474,14 @@ sort_coalesce_list (coalesce_list *cl)
    if (num == 1)
      return;
  
-  /* If there are only 2, just pick swap them if the order isn't correct.  */
-  if (num == 2)
-    {
-      if (cl->sorted[0]->cost > cl->sorted[1]->cost)
-       std::swap (cl->sorted[0], cl->sorted[1]);
-      return;
-    }
-
-  /* Only call qsort if there are more than 2 items.
-     ??? Maybe std::sort will do better, provided that compare_pairs
-     can be inlined.  */
-  if (num > 2)
-      qsort (cl->sorted, num, sizeof (coalesce_pair *), compare_pairs);
+  /* We don't want to depend on qsort_r, so we have to stuff away
+     additional data into globals so it can be referenced in
+     compare_pairs.  */
+  conflicts_ = conflicts;
+  map_ = map;
+  qsort (cl->sorted, num, sizeof (coalesce_pair *), compare_pairs);
+  conflicts_ = NULL;
+  map_ = NULL;
  }
  
  
@@ -437,7 +506,7 @@ dump_coalesce_list (FILE *f, coalesce_list *cl)
           print_generic_expr (f, var1, TDF_SLIM);
           fprintf (f, " <-> ");
           print_generic_expr (f, var2, TDF_SLIM);
-         fprintf (f, "  (%1d), ", node->cost);
+         fprintf (f, "  (%1d, %1d), ", node->cost, node->conflict_count);
           fprintf (f, "\n");
         }
      }
@@ -447,7 +516,7 @@ dump_coalesce_list (FILE *f, coalesce_list *cl)
        for (x = cl->num_sorted - 1 ; x >=0; x--)
          {
           node = cl->sorted[x];
-         fprintf (f, "(%d) ", node->cost);
+         fprintf (f, "(%d, %d) ", node->cost, node->conflict_count);
           var = ssa_name (node->first_element);
           print_generic_expr (f, var, TDF_SLIM);
           fprintf (f, " <-> ");
@@ -459,16 +528,6 @@ dump_coalesce_list (FILE *f, coalesce_list *cl)
  }
  
  
-/* This represents a conflict graph.  Implemented as an array of bitmaps.
-   A full matrix is used for conflicts rather than just upper triangular form.
-   this make sit much simpler and faster to perform conflict merges.  */
-
-struct ssa_conflicts
-{
-  bitmap_obstack obstack;      /* A place to allocate our bitmaps.  */
-  vec<bitmap> conflicts;
-};
-
  /* Return an empty new conflict graph for SIZE elements.  */
  
  static inline ssa_conflicts *
@@ -1800,7 +1859,7 @@ coalesce_ssa_name (void)
    if (dump_file && (dump_flags & TDF_DETAILS))
      ssa_conflicts_dump (dump_file, graph);
  
-  sort_coalesce_list (cl);
+  sort_coalesce_list (cl, graph, map);
  
    if (dump_file && (dump_flags & TDF_DETAILS))
      {
author	Jeff Law <law@redhat.com>
	Wed, 23 Mar 2016 13:20:16 +0000 (07:20 -0600)
committer	Jeff Law <law@gcc.gnu.org>
	Wed, 23 Mar 2016 13:20:16 +0000 (07:20 -0600)
gcc/ChangeLog		patch \| blob \| blame \| history
gcc/bitmap.c		patch \| blob \| blame \| history
gcc/bitmap.h		patch \| blob \| blame \| history
gcc/tree-ssa-coalesce.c		patch \| blob \| blame \| history