]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Revised patch to ensure that histograms from the profile summary are streamed...
authorTeresa Johnson <tejohnson@google.com>
Fri, 30 Nov 2012 16:47:04 +0000 (16:47 +0000)
committerTeresa Johnson <tejohnson@gcc.gnu.org>
Fri, 30 Nov 2012 16:47:04 +0000 (16:47 +0000)
Revised patch to ensure that histograms from the profile summary are streamed
through the LTO files so that the working set can be computed for use in
downstream optimizations.

2012-11-30  Teresa Johnson  <tejohnson@google.com>

* lto-cgraph.c (output_profile_summary): Stream out sum_all
and histogram.
(input_profile_summary): Stream in sum_all and histogram.
(merge_profile_summaries): Merge sum_all and histogram, and
change to use RDIV.
(input_symtab): Call compute_working_sets after merging
summaries.
* gcov-io.c (gcov_histo_index): Make extern for compiler.
* gcov-io.h (gcov_histo_index): Ditto.
* profile.c (compute_working_sets): Remove static keyword.
* profile.h (compute_working_sets): Ditto.
* Makefile.in (lto-cgraph.o): Depend on profile.h.

From-SVN: r193999

gcc/ChangeLog
gcc/Makefile.in
gcc/gcov-io.c
gcc/gcov-io.h
gcc/lto-cgraph.c
gcc/profile.c
gcc/profile.h

index 013ccf5fd3fa947ad8e9fc34d1851232bbf4a58d..6d1822398b855a37a036ddd2bac1a6e30e4e409c 100644 (file)
@@ -1,3 +1,18 @@
+2012-11-30  Teresa Johnson  <tejohnson@google.com>
+
+       * lto-cgraph.c (output_profile_summary): Stream out sum_all
+       and histogram.
+       (input_profile_summary): Stream in sum_all and histogram.
+       (merge_profile_summaries): Merge sum_all and histogram, and
+       change to use RDIV.
+       (input_symtab): Call compute_working_sets after merging
+       summaries.
+       * gcov-io.c (gcov_histo_index): Make extern for compiler.
+       * gcov-io.h (gcov_histo_index): Ditto.
+       * profile.c (compute_working_sets): Remove static keyword.
+       * profile.h (compute_working_sets): Ditto.
+       * Makefile.in (lto-cgraph.o): Depend on profile.h.
+
 2012-11-30  Martin Jambor  <mjambor@suse.cz>
 
        PR middle-end/52890
index abb0648a635d67131b0456c0f7e774f7de18c306..42fb5cf7e107d7f3c3c74aca333913e1fa0a1665 100644 (file)
@@ -2162,7 +2162,7 @@ lto-cgraph.o: lto-cgraph.c $(CONFIG_H) $(SYSTEM_H) coretypes.h   \
    $(HASHTAB_H) langhooks.h $(BASIC_BLOCK_H) \
    $(TREE_FLOW_H) $(CGRAPH_H) $(FUNCTION_H) $(GGC_H) $(DIAGNOSTIC_CORE_H) \
    $(EXCEPT_H) $(TIMEVAR_H) pointer-set.h $(LTO_STREAMER_H) \
-   $(GCOV_IO_H) $(DATA_STREAMER_H) $(TREE_STREAMER_H) $(TREE_PASS_H)
+   $(GCOV_IO_H) $(DATA_STREAMER_H) $(TREE_STREAMER_H) $(TREE_PASS_H) profile.h
 lto-streamer-in.o: lto-streamer-in.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) toplev.h $(DIAGNOSTIC_CORE_H) $(EXPR_H) $(FLAGS_H) $(PARAMS_H) \
    input.h $(HASHTAB_H) $(BASIC_BLOCK_H) $(TREE_FLOW_H) $(TREE_PASS_H) \
index 109401c62b5e79db513b021158859acb40703c10..f45c32cc1e5526ac6e5e2d14e8df8e3cb60d28e8 100644 (file)
@@ -622,11 +622,15 @@ gcov_time (void)
 }
 #endif /* IN_GCOV */
 
-#if IN_LIBGCOV || !IN_GCOV
+#if !IN_GCOV
 /* Determine the index into histogram for VALUE. */
 
+#if IN_LIBGCOV
 static unsigned
-gcov_histo_index(gcov_type value)
+#else
+GCOV_LINKAGE unsigned
+#endif
+gcov_histo_index (gcov_type value)
 {
   gcov_type_unsigned v = (gcov_type_unsigned)value;
   unsigned r = 0;
@@ -664,8 +668,8 @@ gcov_histo_index(gcov_type value)
    its entry's original cumulative counter value when computing the
    new merged cum_value.  */
 
-static void gcov_histogram_merge(gcov_bucket_type *tgt_histo,
-                                 gcov_bucket_type *src_histo)
+static void gcov_histogram_merge (gcov_bucket_type *tgt_histo,
+                                  gcov_bucket_type *src_histo)
 {
   int src_i, tgt_i, tmp_i = 0;
   unsigned src_num, tgt_num, merge_num;
@@ -801,4 +805,4 @@ static void gcov_histogram_merge(gcov_bucket_type *tgt_histo,
   /* Finally, copy the merged histogram into tgt_histo.  */
   memcpy(tgt_histo, tmp_histo, sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE);
 }
-#endif /* IN_LIBGCOV || !IN_GCOV */
+#endif /* !IN_GCOV */
index e1532d79bcd957051db2933ed5fbf1f73f763d67..1f01aacd3e93564fb21b7eefdbc195925b627b67 100644 (file)
@@ -612,6 +612,7 @@ GCOV_LINKAGE void gcov_write_unsigned (gcov_unsigned_t) ATTRIBUTE_HIDDEN;
 
 #if !IN_GCOV && !IN_LIBGCOV
 /* Available only in compiler */
+GCOV_LINKAGE unsigned gcov_histo_index (gcov_type value);
 GCOV_LINKAGE void gcov_write_string (const char *);
 GCOV_LINKAGE gcov_position_t gcov_write_tag (gcov_unsigned_t);
 GCOV_LINKAGE void gcov_write_length (gcov_position_t /*position*/);
index 427362230df29559132f1a317480f2b8a0f38d01..5feaf1abc7ab8601737390c9cf4a9c87c5a9d467 100644 (file)
@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-streamer.h"
 #include "gcov-io.h"
 #include "tree-pass.h"
+#include "profile.h"
 
 static void output_cgraph_opt_summary (void);
 static void input_cgraph_opt_summary (vec<symtab_node>  nodes);
@@ -593,14 +594,39 @@ lto_output_ref (struct lto_simple_output_block *ob, struct ipa_ref *ref,
 static void
 output_profile_summary (struct lto_simple_output_block *ob)
 {
+  unsigned h_ix;
+  struct bitpack_d bp;
+
   if (profile_info)
     {
-      /* We do not output num, sum_all and run_max, they are not used by
-        GCC profile feedback and they are difficult to merge from multiple
-        units.  */
+      /* We do not output num and run_max, they are not used by
+         GCC profile feedback and they are difficult to merge from multiple
+         units.  */
       gcc_assert (profile_info->runs);
       streamer_write_uhwi_stream (ob->main_stream, profile_info->runs);
       streamer_write_uhwi_stream (ob->main_stream, profile_info->sum_max);
+
+      /* sum_all is needed for computing the working set with the
+         histogram.  */
+      streamer_write_uhwi_stream (ob->main_stream, profile_info->sum_all);
+
+      /* Create and output a bitpack of non-zero histogram entries indices.  */
+      bp = bitpack_create (ob->main_stream);
+      for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
+        bp_pack_value (&bp, profile_info->histogram[h_ix].num_counters > 0, 1);
+      streamer_write_bitpack (&bp);
+      /* Now stream out only those non-zero entries.  */
+      for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
+        {
+          if (!profile_info->histogram[h_ix].num_counters)
+            continue;
+          streamer_write_uhwi_stream (ob->main_stream,
+                                      profile_info->histogram[h_ix].num_counters);
+          streamer_write_uhwi_stream (ob->main_stream,
+                                      profile_info->histogram[h_ix].min_value);
+          streamer_write_uhwi_stream (ob->main_stream,
+                                      profile_info->histogram[h_ix].cum_value);
+        }
     }
   else
     streamer_write_uhwi_stream (ob->main_stream, 0);
@@ -1227,11 +1253,38 @@ static void
 input_profile_summary (struct lto_input_block *ib,
                       struct lto_file_decl_data *file_data)
 {
+  unsigned h_ix;
+  struct bitpack_d bp;
   unsigned int runs = streamer_read_uhwi (ib);
   if (runs)
     {
       file_data->profile_info.runs = runs;
       file_data->profile_info.sum_max = streamer_read_uhwi (ib);
+      file_data->profile_info.sum_all = streamer_read_uhwi (ib);
+
+      memset (file_data->profile_info.histogram, 0,
+              sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE);
+      /* Input the bitpack of non-zero histogram indices.  */
+      bp = streamer_read_bitpack (ib);
+      /* Read in and unpack the full bitpack, flagging non-zero
+         histogram entries by setting the num_counters non-zero.  */
+      for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
+        {
+          file_data->profile_info.histogram[h_ix].num_counters
+              = bp_unpack_value (&bp, 1);
+        }
+      for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
+        {
+          if (!file_data->profile_info.histogram[h_ix].num_counters)
+            continue;
+
+          file_data->profile_info.histogram[h_ix].num_counters
+              = streamer_read_uhwi (ib);
+          file_data->profile_info.histogram[h_ix].min_value
+              = streamer_read_uhwi (ib);
+          file_data->profile_info.histogram[h_ix].cum_value
+              = streamer_read_uhwi (ib);
+        }
     }
 
 }
@@ -1242,10 +1295,13 @@ static void
 merge_profile_summaries (struct lto_file_decl_data **file_data_vec)
 {
   struct lto_file_decl_data *file_data;
-  unsigned int j;
+  unsigned int j, h_ix;
   gcov_unsigned_t max_runs = 0;
   struct cgraph_node *node;
   struct cgraph_edge *edge;
+  gcov_type saved_sum_all = 0;
+  gcov_ctr_summary *saved_profile_info = 0;
+  int saved_scale = 0;
 
   /* Find unit with maximal number of runs.  If we ever get serious about
      roundoff errors, we might also consider computing smallest common
@@ -1269,6 +1325,8 @@ merge_profile_summaries (struct lto_file_decl_data **file_data_vec)
   profile_info = &lto_gcov_summary;
   lto_gcov_summary.runs = max_runs;
   lto_gcov_summary.sum_max = 0;
+  memset (lto_gcov_summary.histogram, 0,
+          sizeof (gcov_bucket_type) * GCOV_HISTOGRAM_SIZE);
 
   /* Rescale all units to the maximal number of runs.
      sum_max can not be easily merged, as we have no idea what files come from
@@ -1276,16 +1334,48 @@ merge_profile_summaries (struct lto_file_decl_data **file_data_vec)
   for (j = 0; (file_data = file_data_vec[j]) != NULL; j++)
     if (file_data->profile_info.runs)
       {
-       int scale = ((REG_BR_PROB_BASE * max_runs
-                     + file_data->profile_info.runs / 2)
-                    / file_data->profile_info.runs);
+       int scale = RDIV (REG_BR_PROB_BASE * max_runs,
+                          file_data->profile_info.runs);
        lto_gcov_summary.sum_max = MAX (lto_gcov_summary.sum_max,
-                                       (file_data->profile_info.sum_max
-                                        * scale
-                                        + REG_BR_PROB_BASE / 2)
-                                       / REG_BR_PROB_BASE);
+                                       RDIV (file_data->profile_info.sum_max
+                                              * scale, REG_BR_PROB_BASE));
+       lto_gcov_summary.sum_all = MAX (lto_gcov_summary.sum_all,
+                                       RDIV (file_data->profile_info.sum_all
+                                              * scale, REG_BR_PROB_BASE));
+        /* Save a pointer to the profile_info with the largest
+           scaled sum_all and the scale for use in merging the
+           histogram.  */
+        if (lto_gcov_summary.sum_all > saved_sum_all)
+          {
+            saved_profile_info = &file_data->profile_info;
+            saved_sum_all = lto_gcov_summary.sum_all;
+            saved_scale = scale;
+          }
       }
 
+  gcc_assert (saved_profile_info);
+
+  /* Scale up the histogram from the profile that had the largest
+     scaled sum_all above.  */
+  for (h_ix = 0; h_ix < GCOV_HISTOGRAM_SIZE; h_ix++)
+    {
+      /* Scale up the min value as we did the corresponding sum_all
+         above. Use that to find the new histogram index.  */
+      int scaled_min = RDIV (saved_profile_info->histogram[h_ix].min_value
+                             * saved_scale, REG_BR_PROB_BASE);
+      unsigned new_ix = gcov_histo_index (scaled_min);
+      lto_gcov_summary.histogram[new_ix].min_value = scaled_min;
+      /* Some of the scaled counter values would ostensibly need to be placed
+         into different (larger) histogram buckets, but we keep things simple
+         here and place the scaled cumulative counter value in the bucket
+         corresponding to the scaled minimum counter value.  */
+      lto_gcov_summary.histogram[new_ix].cum_value
+          = RDIV (saved_profile_info->histogram[h_ix].cum_value
+                  * saved_scale, REG_BR_PROB_BASE);
+      lto_gcov_summary.histogram[new_ix].num_counters
+          = saved_profile_info->histogram[h_ix].num_counters;
+    }
+
   /* Watch roundoff errors.  */
   if (lto_gcov_summary.sum_max < max_runs)
     lto_gcov_summary.sum_max = max_runs;
@@ -1303,10 +1393,8 @@ merge_profile_summaries (struct lto_file_decl_data **file_data_vec)
       {
        int scale;
 
-       scale =
-          ((node->count_materialization_scale * max_runs
-            + node->symbol.lto_file_data->profile_info.runs / 2)
-           / node->symbol.lto_file_data->profile_info.runs);
+       scale = RDIV (node->count_materialization_scale * max_runs,
+                      node->symbol.lto_file_data->profile_info.runs);
        node->count_materialization_scale = scale;
        if (scale < 0)
          fatal_error ("Profile information in %s corrupted",
@@ -1315,10 +1403,8 @@ merge_profile_summaries (struct lto_file_decl_data **file_data_vec)
        if (scale == REG_BR_PROB_BASE)
          continue;
        for (edge = node->callees; edge; edge = edge->next_callee)
-         edge->count = ((edge->count * scale + REG_BR_PROB_BASE / 2)
-                        / REG_BR_PROB_BASE);
-       node->count = ((node->count * scale + REG_BR_PROB_BASE / 2)
-                      / REG_BR_PROB_BASE);
+         edge->count = RDIV (edge->count * scale, REG_BR_PROB_BASE);
+       node->count = RDIV (node->count * scale, REG_BR_PROB_BASE);
       }
 }
 
@@ -1365,6 +1451,8 @@ input_symtab (void)
     }
 
   merge_profile_summaries (file_data_vec);
+  compute_working_sets ();
+
 
   /* Clear out the aux field that was used to store enough state to
      tell which nodes should be overwritten.  */
index bbe02b1f00aef758d319edee70c54cdff3e5c794..b50150d6c1e3f6d94d19482f1e30c5c5988154fa 100644 (file)
@@ -207,7 +207,7 @@ instrument_values (histogram_values values)
    the number of counters required to cover that working set percentage and
    the minimum counter value in that working set.  */
 
-static void
+void
 compute_working_sets (void)
 {
   gcov_type working_set_cum_values[NUM_GCOV_WORKING_SETS];
index c97d8efee5442e53a4495a70e3c3edd4ad15ff1c..ed8c88667d80f8d2fe7bf3f603b6d85a161b22dd 100644 (file)
@@ -47,4 +47,6 @@ extern gcov_type sum_edge_counts (vec<edge, va_gc> *edges);
 extern void init_node_map (void);
 extern void del_node_map (void);
 
+extern void compute_working_sets (void);
+
 #endif /* PROFILE_H */