From: Jan Hubicka Date: Sun, 24 Aug 2008 20:09:32 +0000 (+0200) Subject: invoke.texi (-fipa-cp-clone): New option. X-Git-Tag: releases/gcc-4.4.0~2892 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5e45130d9933ee9f3c2a6e7571985e6667a0f8fd;p=thirdparty%2Fgcc.git invoke.texi (-fipa-cp-clone): New option. * doc/invoke.texi (-fipa-cp-clone): New option. (-fipa-cp): Update docs. (--param ipcp-unit-growth):New. * ipa-cp.c: Include fibheap.h, params.h (ipcp_initialize_node_lattices): When not cloning, all externally visible functions are bottom. (ipcp_need_redirect_p): Accept clones. (ipcp_insert_stage): Use cost driven heuristics. (max_count, dead_nodes): New static vars. (ipcp_need_original_clone_p, ipcp_estimate_cloning_cost, ipcp_const_param_count): New functions. * common.opt (ipa-cp-clone): New command line option. * params.def (ipcp-unit-growth): New. * gcc.dg/ipa/ipacost-1.c: New testcase. * gcc.dg/ipa/ipacost-2.c: New testcase. * gcc.dg/ipa/ipa-7.c: Update template. From-SVN: r139543 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 810d8fd64e2a..8aaece176992 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2008-08-24 Jan Hubicka + + * doc/invoke.texi (-fipa-cp-clone): New option. + (-fipa-cp): Update docs. + (--param ipcp-unit-growth):New. + * ipa-cp.c: Include fibheap.h, params.h + (ipcp_initialize_node_lattices): When not cloning, all externally + visible functions are bottom. + (ipcp_need_redirect_p): Accept clones. + (ipcp_insert_stage): Use cost driven heuristics. + (max_count, dead_nodes): New static vars. + (ipcp_need_original_clone_p, ipcp_estimate_cloning_cost, + ipcp_const_param_count): New functions. + * common.opt (ipa-cp-clone): New command line option. + * params.def (ipcp-unit-growth): New. + 2008-08-24 Jan Hubicka * tree-inline.c (tree_function_versioning): Look harder diff --git a/gcc/common.opt b/gcc/common.opt index 9fc5db3e1583..523f71206c7f 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -623,6 +623,10 @@ fipa-cp Common Report Var(flag_ipa_cp) Optimization Perform Interprocedural constant propagation +fipa-cp-clone +Common Report Var(flag_ipa_cp_clone) Optimization +Perform cloning to make Interprocedural constant propagation stronger + fipa-pure-const Common Report Var(flag_ipa_pure_const) Init(0) Optimization Discover pure and const functions diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 3b71fd345ad0..e34802a356e6 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -330,7 +330,7 @@ Objective-C and Objective-C++ Dialects}. -ffunction-sections -fgcse -fgcse-after-reload -fgcse-las -fgcse-lm @gol -fgcse-sm -fif-conversion -fif-conversion2 -findirect-inlining @gol -finline-functions -finline-functions-called-once -finline-limit=@var{n} @gol --finline-small-functions -fipa-cp -fipa-marix-reorg -fipa-pta @gol +-finline-small-functions -fipa-cp -fipa-cp-clone -fipa-marix-reorg -fipa-pta @gol -fipa-pure-const -fipa-reference -fipa-struct-reorg @gol -fipa-type-escape -fivopts -fkeep-inline-functions -fkeep-static-consts @gol -fmerge-all-constants -fmerge-constants -fmodulo-sched @gol @@ -5854,9 +5854,16 @@ Perform interprocedural constant propagation. This optimization analyzes the program to determine when values passed to functions are constants and then optimizes accordingly. This optimization can substantially increase performance -if the application has constants passed to functions, but -because this optimization can create multiple copies of functions, -it may significantly increase code size. +if the application has constants passed to functions. + +@item -fipa-cp-clone +@opindex fipa-cp-clone +Perform function cloning to make interprocedural constant propagation stronger. +When enabled, interprocedural constant propagation will perform function cloning +when externally visible function can be called with constant arguments. +Because this optimization can create multiple copies of functions, +it may significantly increase code size +(see @option{--param ipcp-unit-growth=@var{value}}). @item -fipa-matrix-reorg @opindex fipa-matrix-reorg @@ -6953,6 +6960,11 @@ Specifies maximal overall growth of the compilation unit caused by inlining. The default value is 30 which limits unit growth to 1.3 times the original size. +@item ipcp-unit-growth +Specifies maximal overall growth of the compilation unit caused by +interprocedural constant propagation. The default value is 10 which limits +unit growth to 1.1 times the original size. + @item large-stack-frame The limit specifying large stack frames. While inlining the algorithm is trying to not grow past this limit too much. Default value is 256 bytes. diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index ad43298ae4e9..b0ed074e3a94 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -132,6 +132,8 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic.h" #include "tree-dump.h" #include "tree-inline.h" +#include "fibheap.h" +#include "params.h" /* Get the original node field of ipa_node_params associated with node NODE. */ static inline struct cgraph_node * @@ -375,8 +377,15 @@ ipcp_initialize_node_lattices (struct cgraph_node *node) info->ipcp_lattices = XCNEWVEC (struct ipcp_lattice, ipa_get_param_count (info)); - for (i = 0; i < ipa_get_param_count (info) ; i++) - ipcp_get_ith_lattice (info, i)->type = IPA_TOP; + + /* When cloning is allowed, we can assume that externally visible functions + are not called. We will compensate this by cloning later. */ + if (flag_ipa_cp_clone || !node->needed) + for (i = 0; i < ipa_get_param_count (info) ; i++) + ipcp_get_ith_lattice (info, i)->type = IPA_TOP; + else + for (i = 0; i < ipa_get_param_count (info) ; i++) + ipcp_get_ith_lattice (info, i)->type = IPA_BOTTOM; } /* build INTEGER_CST tree with type TREE_TYPE and value according to LAT. @@ -755,8 +764,14 @@ ipcp_need_redirect_p (struct cgraph_edge *cs) struct ipa_node_params *orig_callee_info; int i, count; struct ipa_jump_func *jump_func; + struct cgraph_node *node = cs->callee, *orig; + + if ((orig = ipcp_get_orig_node (node)) != NULL) + node = orig; + if (ipcp_get_orig_node (cs->caller)) + return false; - orig_callee_info = IPA_NODE_REF (ipcp_get_orig_node (cs->callee)); + orig_callee_info = IPA_NODE_REF (node); count = ipa_get_param_count (orig_callee_info); for (i = 0; i < count; i++) { @@ -852,23 +867,124 @@ ipcp_update_profiling (void) } } +/* Maximal count found in program. */ +static gcov_type max_count; +bitmap dead_nodes; + +/* Return true if original clone needs to be preserved. */ +static bool +ipcp_need_original_clone_p (struct cgraph_node *node) +{ + struct cgraph_edge *e; + + if (node->needed) + return true; + for (e = node->callers; e; e = e->next_caller) + if (!bitmap_bit_p (dead_nodes, e->caller->uid) + && ipcp_need_redirect_p (e)) + return true; + + return false; +} + +/* Estimate cost of cloning NODE. */ +static long +ipcp_estimate_cloning_cost (struct cgraph_node *node) +{ + int freq_sum = 1; + gcov_type count_sum = 1; + struct cgraph_edge *e; + int cost; + + /* When we don't need original clone; we should always propagate. */ + if (!ipcp_need_original_clone_p (node)) + { + if (dump_file) + fprintf (dump_file, "Function %s can be fully propagated\n", + cgraph_node_name (node)); + return 0; + } + + for (e = node->callers; e; e = e->next_caller) + if (!bitmap_bit_p (dead_nodes, e->caller->uid) + && !ipcp_need_redirect_p (e)) + { + count_sum += e->count; + freq_sum += e->frequency + 1; + } + + cost = node->local.inline_summary.self_insns * 1000; + if (max_count) + cost /= count_sum * 1000 / max_count + 1; + else + cost /= freq_sum * 1000 / REG_BR_PROB_BASE + 1; + if (dump_file) + fprintf (dump_file, "Cost of versioning %s is %i, (size: %i, freq: %i)\n", + cgraph_node_name (node), cost, node->local.inline_summary.self_insns, + freq_sum); + return cost + 1; +} + +/* Return number of live constant parameters. */ +static int +ipcp_const_param_count (struct cgraph_node *node) +{ + int const_param = 0; + struct ipa_node_params *info = IPA_NODE_REF (node); + int count = ipa_get_param_count (info); + int i; + + for (i = 0; i < count; i++) + { + struct ipcp_lattice *lat = ipcp_get_ith_lattice (info, i); + tree parm_tree = ipa_get_ith_param (info, i); + if (ipcp_lat_is_insertable (lat) + /* Do not count obviously unused arguments. */ + && (!is_gimple_reg (parm_tree) + || gimple_default_def (DECL_STRUCT_FUNCTION (node->decl), + parm_tree))) + const_param++; + } + return const_param; +} + /* Propagate the constant parameters found by ipcp_iterate_stage() to the function's code. */ static void ipcp_insert_stage (void) { struct cgraph_node *node, *node1 = NULL; - int i, const_param; + int i; VEC (cgraph_edge_p, heap) * redirect_callers; varray_type replace_trees; struct cgraph_edge *cs; int node_callers, count; tree parm_tree; struct ipa_replace_map *replace_param; + fibheap_t heap; + long overall_insns = 0, new_insns = 0; + long max_new_insns; ipa_check_create_node_params (); ipa_check_create_edge_args (); + dead_nodes = BITMAP_ALLOC (NULL); + + for (node = cgraph_nodes; node; node = node->next) + if (node->analyzed) + { + if (node->count > max_count) + max_count = node->count; + overall_insns += node->local.inline_summary.self_insns; + } + + max_new_insns = overall_insns; + if (max_new_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS)) + max_new_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS); + max_new_insns = max_new_insns * PARAM_VALUE (PARAM_IPCP_UNIT_GROWTH) / 100 + 1; + + /* First collect all functions we proved to have constant arguments to heap. */ + heap = fibheap_new (); for (node = cgraph_nodes; node; node = node->next) { struct ipa_node_params *info; @@ -878,32 +994,64 @@ ipcp_insert_stage (void) info = IPA_NODE_REF (node); if (ipa_is_called_with_var_arguments (info)) continue; - const_param = 0; + if (ipcp_const_param_count (node)) + node->aux = fibheap_insert (heap, ipcp_estimate_cloning_cost (node), node); + } + + /* Now clone in priority order until code size growth limits are met or + heap is emptied. */ + while (!fibheap_empty (heap)) + { + struct ipa_node_params *info; + int growth = 0; + + node = (struct cgraph_node *)fibheap_extract_min (heap); + node->aux = NULL; + if (dump_file) + fprintf (dump_file, "considering function %s\n", + cgraph_node_name (node)); + + if (ipcp_need_original_clone_p (node)) + growth = node->local.inline_summary.self_insns; + else + bitmap_set_bit (dead_nodes, node->uid); + + if (new_insns + growth > max_new_insns) + break; + if (growth + && (optimize_size + || (DECL_STRUCT_FUNCTION (node->decl) + ->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED))) + { + if (dump_file) + fprintf (dump_file, "Not versioning, cold code would grow"); + continue; + } + + new_insns += growth; + + info = IPA_NODE_REF (node); count = ipa_get_param_count (info); + + VARRAY_GENERIC_PTR_INIT (replace_trees, ipcp_const_param_count (node), + "replace_trees"); for (i = 0; i < count; i++) { struct ipcp_lattice *lat = ipcp_get_ith_lattice (info, i); - tree parm_tree = ipa_get_ith_param (info, i); - if (ipcp_lat_is_insertable (lat) + parm_tree = ipa_get_ith_param (info, i); + + if (lat->type == IPA_CONST_VALUE /* Do not count obviously unused arguments. */ && (!is_gimple_reg (parm_tree) - || gimple_default_def (DECL_STRUCT_FUNCTION (node->decl), parm_tree))) - const_param++; - } - if (const_param == 0) - continue; - VARRAY_GENERIC_PTR_INIT (replace_trees, const_param, "replace_trees"); - for (i = 0; i < count; i++) - { - struct ipcp_lattice *lat = ipcp_get_ith_lattice (info, i); - if (lat->type == IPA_CONST_VALUE) + || gimple_default_def (DECL_STRUCT_FUNCTION (node->decl), + parm_tree))) { - parm_tree = ipa_get_ith_param (info, i); replace_param = ipcp_create_replace_map (parm_tree, lat); VARRAY_PUSH_GENERIC_PTR (replace_trees, replace_param); } } + /* Compute how many callers node has. */ node_callers = 0; for (cs = node->callers; cs != NULL; cs = cs->next_caller) @@ -911,6 +1059,7 @@ ipcp_insert_stage (void) redirect_callers = VEC_alloc (cgraph_edge_p, heap, node_callers); for (cs = node->callers; cs != NULL; cs = cs->next_caller) VEC_quick_push (cgraph_edge_p, redirect_callers, cs); + /* Redirecting all the callers of the node to the new versioned node. */ node1 = @@ -920,15 +1069,36 @@ ipcp_insert_stage (void) if (node1 == NULL) continue; if (dump_file) - fprintf (dump_file, "versioned function %s\n", - cgraph_node_name (node)); + fprintf (dump_file, "versioned function %s with growth %i, overall %i\n", + cgraph_node_name (node), (int)growth, (int)new_insns); ipcp_init_cloned_node (node, node1); + /* We've possibly introduced direct calls. */ ipcp_update_cloned_node (node1); if (dump_file) dump_function_to_file (node1->decl, dump_file, dump_flags); + + for (cs = node->callees; cs; cs = cs->next_callee) + if (cs->callee->aux) + { + fibheap_delete_node (heap, (fibnode_t) cs->callee->aux); + cs->callee->aux = fibheap_insert (heap, + ipcp_estimate_cloning_cost (cs->callee), + cs->callee); + } + } + + while (!fibheap_empty (heap)) + { + if (dump_file) + fprintf (dump_file, "skipping function %s\n", + cgraph_node_name (node)); + node = (struct cgraph_node *) fibheap_extract_min (heap); + node->aux = NULL; } + fibheap_delete (heap); + BITMAP_FREE (dead_nodes); ipcp_update_callgraph (); ipcp_update_profiling (); } diff --git a/gcc/params.def b/gcc/params.def index d1f8fcec0e76..8dd1cf48eb50 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -193,6 +193,10 @@ DEFPARAM(PARAM_INLINE_UNIT_GROWTH, "inline-unit-growth", "how much can given compilation unit grow because of the inlining (in percent)", 30, 0, 0) +DEFPARAM(PARAM_IPCP_UNIT_GROWTH, + "ipcp-unit-growth", + "how much can given compilation unit grow because of the interprocedural constant propagation (in percent)", + 10, 0, 0) DEFPARAM(PARAM_INLINE_CALL_COST, "inline-call-cost", "expense of call operation relative to ordinary arithmetic operations", diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c0597790972d..7c63b60d1185 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2008-08-24 Jan Hubicka + + * gcc.dg/ipa/ipacost-1.c: New testcase. + * gcc.dg/ipa/ipacost-2.c: New testcase. + * gcc.dg/ipa/ipa-7.c: Update template. + 2008-08-24 Tobias Burnus PR fortran/37201 diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-7.c b/gcc/testsuite/gcc.dg/ipa/ipa-7.c index 60fc9c0b06f9..0005057047db 100644 --- a/gcc/testsuite/gcc.dg/ipa/ipa-7.c +++ b/gcc/testsuite/gcc.dg/ipa/ipa-7.c @@ -25,7 +25,7 @@ int main () /* { dg-final { scan-ipa-dump-times "versioned function" 1 "cp" } } */ -/* { dg-final { scan-ipa-dump-times "propagating const" 1 "cp" } } */ +/* { dg-final { scan-ipa-dump-times "replacing param with const" 1 "cp" } } */ /* { dg-final { cleanup-ipa-dump "cp" } } */