From e767b5be438a67c59f8f23b6214fcbb7183a8092 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sat, 16 Aug 2003 13:04:59 +0200 Subject: [PATCH] i386.c (ix86_fntype_regparm): Rename from ... * i386.c (ix86_fntype_regparm): Rename from ... (ix86_function_regparm): ... this one; add fastcall and local functions. (ix86_function_ok_for_sibcall): Update. (ix86_return_pops_args): Likewise. (init_cumulative_args): Likewise. (x86_can_output_mi_thunk): Likewise. (function_arg): Fix formating. (x86_this_parameter): Fix fastcall. (x86_output_mi_thunk): Likewise. * cgraph.c (cgraph_mark_needed_node): Do not mark functions without body as reachable; mark nested functions as needed too. (dump_cgraph): Do not output global.calls. * cgraph.h (cgraph_global_info): Kill. * cgraphunit.c (cgraph_finalize_function): Enqueue needed functions. (record_call_1): Speedup. (cgraph_analyze_function): Break out from ...; compute inlining parameters. (cgraph_finalize_compilation_unit): ... here. (cgraph_mark_inline): Kill computation of calls. (cgraph_decide_inlining): Do not compute most of initial values. From-SVN: r70504 --- gcc/ChangeLog | 25 +++++++++ gcc/cgraph.c | 25 +++++---- gcc/cgraph.h | 3 -- gcc/cgraphunit.c | 104 +++++++++++++++++------------------ gcc/config/i386/i386.c | 120 ++++++++++++++++++++++------------------- 5 files changed, 156 insertions(+), 121 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 159dc12c7372..a88550421367 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2003-08-16 Jan Hubicka + + * i386.c (ix86_fntype_regparm): Rename from ... + (ix86_function_regparm): ... this one; add fastcall and local + functions. + (ix86_function_ok_for_sibcall): Update. + (ix86_return_pops_args): Likewise. + (init_cumulative_args): Likewise. + (x86_can_output_mi_thunk): Likewise. + (function_arg): Fix formating. + (x86_this_parameter): Fix fastcall. + (x86_output_mi_thunk): Likewise. + + * cgraph.c (cgraph_mark_needed_node): Do not mark functions without + body as reachable; mark nested functions as needed too. + (dump_cgraph): Do not output global.calls. + * cgraph.h (cgraph_global_info): Kill. + * cgraphunit.c (cgraph_finalize_function): Enqueue needed functions. + (record_call_1): Speedup. + (cgraph_analyze_function): Break out from ...; compute inlining + parameters. + (cgraph_finalize_compilation_unit): ... here. + (cgraph_mark_inline): Kill computation of calls. + (cgraph_decide_inlining): Do not compute most of initial values. + 2003-08-14 Roger Sayle * fold-const.c (negate_expr_p): MULT_EXPRs and RDIV_EXPRs are easy diff --git a/gcc/cgraph.c b/gcc/cgraph.c index fedecbcdba57..9aa12a6102df 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -240,17 +240,24 @@ void cgraph_mark_needed_node (struct cgraph_node *node, int needed) { if (needed) - { - node->needed = 1; - } - if (!node->reachable) + node->needed = 1; + + if (!node->reachable && DECL_SAVED_TREE (node->decl)) { node->reachable = 1; - if (DECL_SAVED_TREE (node->decl)) + + node->next_needed = cgraph_nodes_queue; + cgraph_nodes_queue = node; + + /* At the moment frontend automatically emits all nested functions. */ + if (node->nested) { - node->next_needed = cgraph_nodes_queue; - cgraph_nodes_queue = node; - } + struct cgraph_node *node2; + + for (node2 = node->nested; node2; node2 = node2->next_nested) + if (!node2->reachable) + cgraph_mark_needed_node (node2, 0); + } } } @@ -361,8 +368,6 @@ dump_cgraph (FILE *f) fprintf (f, " %i insns after inlining", node->global.insns); if (node->global.cloned_times > 1) fprintf (f, " cloned %ix", node->global.cloned_times); - if (node->global.calls) - fprintf (f, " %i calls", node->global.calls); fprintf (f, "\n called by :"); for (edge = node->callers; edge; edge = edge->next_caller) diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 849c8ee5486e..f96ac0d0d59a 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -52,9 +52,6 @@ struct cgraph_global_info GTY(()) /* Estimated size of the function after inlining. */ int insns; - /* Number of direct calls not inlined into the function body. */ - int calls; - /* Number of times given function will be cloned during output. */ int cloned_times; diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index b75c717e922d..faf685091001 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -65,6 +65,10 @@ cgraph_finalize_function (tree decl, tree body ATTRIBUTE_UNUSED) node->decl = decl; node->local.finalized = true; + /* Function now has DECL_SAVED_TREE set. Enqueue it into cgraph_nodes_queue + if needed. */ + if (node->needed) + cgraph_mark_needed_node (node, 0); if (/* Externally visible functions must be output. The exception are COMDAT functions that must be output only when they are needed. Similarly are handled deferred functions and @@ -122,6 +126,10 @@ record_call_1 (tree *tp, int *walk_subtrees, void *data) *walk_subtrees = 0; } } + /* Save some cycles by not walking types and declaration as we won't find anything + usefull there anyway. */ + if (DECL_P (*tp) || TYPE_P (*tp)) + *walk_subtrees = 0; return NULL; } @@ -135,13 +143,46 @@ cgraph_create_edges (tree decl, tree body) walk_tree_without_duplicates (&body, record_call_1, decl); } +/* Analyze the function scheduled to be output. */ +static void +cgraph_analyze_function (struct cgraph_node *node) +{ + tree decl = node->decl; + + if (lang_hooks.callgraph.lower_function) + (*lang_hooks.callgraph.lower_function) (decl); + + current_function_decl = node->decl; + + /* First kill forward declaration so reverse inlining works properly. */ + cgraph_create_edges (decl, DECL_SAVED_TREE (decl)); + + node->local.inlinable = tree_inlinable_function_p (decl); + if (!DECL_ESTIMATED_INSNS (decl)) + DECL_ESTIMATED_INSNS (decl) + = (*lang_hooks.tree_inlining.estimate_num_insns) (decl); + node->local.self_insns = DECL_ESTIMATED_INSNS (decl); + if (node->local.inlinable) + node->local.disregard_inline_limits + = (*lang_hooks.tree_inlining.disregard_inline_limits) (decl); + + /* Inlining characteristics are maintained by the cgraph_mark_inline. */ + node->global.insns = node->local.self_insns; + if (!DECL_EXTERNAL (node->decl)) + { + node->global.cloned_times = 1; + node->global.will_be_output = true; + } + + node->lowered = true; +} + /* Analyze the whole compilation unit once it is parsed completely. */ void cgraph_finalize_compilation_unit (void) { struct cgraph_node *node; - struct cgraph_edge *edge; cgraph_varpool_assemble_pending_decls (); if (!quiet_flag) @@ -163,6 +204,7 @@ cgraph_finalize_compilation_unit (void) method table generation for instance). */ while (cgraph_nodes_queue) { + struct cgraph_edge *edge; tree decl = cgraph_nodes_queue->decl; node = cgraph_nodes_queue; @@ -171,38 +213,12 @@ cgraph_finalize_compilation_unit (void) if (node->lowered || !node->reachable || !DECL_SAVED_TREE (decl)) abort (); - if (lang_hooks.callgraph.lower_function) - (*lang_hooks.callgraph.lower_function) (decl); - - current_function_decl = node->decl; - - /* At the moment frontend automatically emits all nested functions. */ - if (node->nested) - { - struct cgraph_node *node2; - - for (node2 = node->nested; node2; node2 = node2->next_nested) - if (!node2->reachable) - cgraph_mark_needed_node (node2, 0); - } - - /* First kill forward declaration so reverse inlining works properly. */ - cgraph_create_edges (decl, DECL_SAVED_TREE (decl)); - - node->local.inlinable = tree_inlinable_function_p (decl); - DECL_ESTIMATED_INSNS (decl) - = (*lang_hooks.tree_inlining.estimate_num_insns) (decl); - node->local.self_insns = DECL_ESTIMATED_INSNS (decl); - if (node->local.inlinable) - node->local.disregard_inline_limits - = (*lang_hooks.tree_inlining.disregard_inline_limits) (decl); - + cgraph_analyze_function (node); for (edge = node->callees; edge; edge = edge->next_callee) - { - if (!edge->callee->reachable) - cgraph_mark_needed_node (edge->callee, 0); - } - node->lowered = true; + { + if (!edge->callee->reachable) + cgraph_mark_needed_node (edge->callee, 0); + } cgraph_varpool_assemble_pending_decls (); } /* Collect entry points to the unit. */ @@ -214,6 +230,7 @@ cgraph_finalize_compilation_unit (void) if (node->needed && DECL_SAVED_TREE (node->decl)) fprintf (cgraph_dump_file, " %s", cgraph_node_name (node)); fprintf (cgraph_dump_file, "\n"); + dump_cgraph (cgraph_dump_file); } if (cgraph_dump_file) @@ -650,7 +667,6 @@ cgraph_mark_inline (struct cgraph_node *to, struct cgraph_node *what, overall_insns += new_insns - to->global.insns; to->global.insns = new_insns; - to->global.calls += (what->global.calls - 1) *times; if (!called && !what->needed && !what->origin && !DECL_EXTERNAL (what->decl)) { @@ -662,8 +678,6 @@ cgraph_mark_inline (struct cgraph_node *to, struct cgraph_node *what, overall_insns -= what->global.insns; } what->global.cloned_times += clones; - if (to->global.calls < 0) - abort (); for (i = 0; i < ninlined; i++) { new_insns = @@ -672,10 +686,6 @@ cgraph_mark_inline (struct cgraph_node *to, struct cgraph_node *what, if (inlined[i]->global.will_be_output) overall_insns += new_insns - inlined[i]->global.insns; inlined[i]->global.insns = new_insns; - inlined[i]->global.calls += - (what->global.calls - 1) *INLINED_TIMES (inlined[i]) * times; - if (inlined[i]->global.calls < 0) - abort (); } for (i = 0; i < ninlined_callees; i++) { @@ -883,21 +893,7 @@ cgraph_decide_inlining (void) int i, y; for (node = cgraph_nodes; node; node = node->next) - { - int ncalls = 0; - struct cgraph_edge *e; - - node->global.insns = node->local.self_insns; - for (e = node->callees; e; e = e->next_callee) - ncalls++; - node->global.calls = ncalls; - if (!DECL_EXTERNAL (node->decl)) - { - node->global.cloned_times = 1; - initial_insns += node->local.self_insns; - node->global.will_be_output = true; - } - } + initial_insns += node->local.self_insns; overall_insns = initial_insns; nnodes = cgraph_postorder (order); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 7b1a39bf4d62..db404f07b10d 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -866,7 +866,7 @@ static unsigned int ix86_select_alt_pic_regnum (void); static int ix86_save_reg (unsigned int, int); static void ix86_compute_frame_layout (struct ix86_frame *); static int ix86_comp_type_attributes (tree, tree); -static int ix86_fntype_regparm (tree); +static int ix86_function_regparm (tree, tree); const struct attribute_spec ix86_attribute_table[]; static bool ix86_function_ok_for_sibcall (tree, tree); static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *); @@ -1532,19 +1532,14 @@ ix86_function_ok_for_sibcall (tree decl, tree exp) such registers are not used for passing parameters. */ if (!decl && !TARGET_64BIT) { - int regparm = ix86_regparm; - tree attr, type; + tree type; /* We're looking at the CALL_EXPR, we need the type of the function. */ type = TREE_OPERAND (exp, 0); /* pointer expression */ type = TREE_TYPE (type); /* pointer type */ type = TREE_TYPE (type); /* function type */ - attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); - if (attr) - regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); - - if (regparm >= 3) + if (ix86_function_regparm (type, NULL) >= 3) { /* ??? Need to count the actual number of registers to be used, not the possible number of registers. Fix later. */ @@ -1637,9 +1632,9 @@ ix86_handle_regparm_attribute (tree *node, tree name, tree args, } if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) - { - error ("fastcall and regparm attributes are not compatible"); - } + { + error ("fastcall and regparm attributes are not compatible"); + } } return NULL_TREE; @@ -1670,18 +1665,49 @@ ix86_comp_type_attributes (tree type1, tree type2) return 1; } -/* Return the regparm value for a fuctio with the indicated TYPE. */ +/* Return the regparm value for a fuctio with the indicated TYPE and DECL. + DECL may be NULL when calling function indirectly + or considerling a libcall. */ static int -ix86_fntype_regparm (tree type) +ix86_function_regparm (tree type, tree decl) { tree attr; + int regparm = ix86_regparm; + bool user_convention = false; - attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); - if (attr) - return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); - else - return ix86_regparm; + if (!TARGET_64BIT) + { + attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); + if (attr) + { + regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); + user_convention = true; + } + + if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) + { + regparm = 2; + user_convention = true; + } + + /* Use register calling convention for local functions when possible. */ + if (!TARGET_64BIT && !user_convention && decl + && flag_unit_at_a_time) + { + struct cgraph_local_info *i = cgraph_local_info (decl); + if (i && i->local) + { + /* We can't use regparm(3) for nested functions as these use + static chain pointer in third argument. */ + if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl)) + regparm = 2; + else + regparm = 3; + } + } + } + return regparm; } /* Value is the number of bytes of arguments automatically @@ -1725,7 +1751,7 @@ ix86_return_pops_args (tree fundecl, tree funtype, int size) if (aggregate_value_p (TREE_TYPE (funtype)) && !TARGET_64BIT) { - int nregs = ix86_fntype_regparm (funtype); + int nregs = ix86_function_regparm (funtype, fundecl); if (!nregs) return GET_MODE_SIZE (Pmode); @@ -1767,7 +1793,6 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ { static CUMULATIVE_ARGS zero_cum; tree param, next_param; - bool user_convention = false; if (TARGET_DEBUG_ARG) { @@ -1786,18 +1811,11 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ *cum = zero_cum; /* Set up the number of registers to use for passing arguments. */ - cum->nregs = ix86_regparm; + if (fntype) + cum->nregs = ix86_function_regparm (fntype, fndecl); + else + cum->nregs = ix86_regparm; cum->sse_nregs = SSE_REGPARM_MAX; - if (fntype && !TARGET_64BIT) - { - tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype)); - - if (attr) - { - cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); - user_convention = true; - } - } cum->maybe_vaarg = false; /* Use ecx and edx registers if function has fastcall attribute */ @@ -1807,23 +1825,6 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ { cum->nregs = 2; cum->fastcall = 1; - user_convention = true; - } - } - - /* Use register calling convention for local functions when possible. */ - if (!TARGET_64BIT && !user_convention && fndecl - && flag_unit_at_a_time) - { - struct cgraph_local_info *i = cgraph_local_info (fndecl); - if (i && i->local) - { - /* We can't use regparm(3) for nested functions as these use - static chain pointer in third argument. */ - if (DECL_CONTEXT (fndecl) && !DECL_NO_STATIC_CHAIN (fndecl)) - cum->nregs = 2; - else - cum->nregs = 3; } } @@ -2501,7 +2502,7 @@ function_arg (CUMULATIVE_ARGS *cum, /* current arg information */ /* ECX not EAX is the first allocated register. */ if (regno == 0) - regno = 2; + regno = 2; } ret = gen_rtx_REG (mode, regno); } @@ -15087,7 +15088,7 @@ x86_this_parameter (tree function) return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]); } - if (ix86_fntype_regparm (type) > 0) + if (ix86_function_regparm (type, function) > 0) { tree parm; @@ -15097,9 +15098,14 @@ x86_this_parameter (tree function) for (; parm; parm = TREE_CHAIN (parm)) if (TREE_VALUE (parm) == void_type_node) break; - /* If not, the this parameter is in %eax. */ + /* If not, the this parameter is in the first argument. */ if (parm) - return gen_rtx_REG (SImode, 0); + { + int regno = 0; + if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) + regno = 2; + return gen_rtx_REG (SImode, 0); + } } if (aggregate_value_p (TREE_TYPE (type))) @@ -15120,7 +15126,7 @@ x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED, return true; /* For 32-bit, everything's fine if we have one free register. */ - if (ix86_fntype_regparm (TREE_TYPE (function)) < 3) + if (ix86_function_regparm (TREE_TYPE (function), function) < 3) return true; /* Need a free register for vcall_offset. */ @@ -15191,7 +15197,13 @@ x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, if (TARGET_64BIT) tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */); else - tmp = gen_rtx_REG (SImode, 2 /* ECX */); + { + int tmp_regno = 2 /* ECX */; + if (lookup_attribute ("fastcall", + TYPE_ATTRIBUTES (TREE_TYPE (function)))) + tmp_regno = 0 /* EAX */; + tmp = gen_rtx_REG (SImode, tmp_regno); + } xops[0] = gen_rtx_MEM (Pmode, this_reg); xops[1] = tmp; -- 2.47.2