/* Global, SSA-based optimizations using mathematical identities.
- Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
- Free Software Foundation, Inc.
+ Copyright (C) 2005-2014 Free Software Foundation, Inc.
This file is part of GCC.
#include "tm.h"
#include "flags.h"
#include "tree.h"
-#include "tree-flow.h"
+#include "predict.h"
+#include "vec.h"
+#include "hashtab.h"
+#include "hash-set.h"
+#include "machmode.h"
+#include "hard-reg-set.h"
+#include "input.h"
+#include "function.h"
+#include "dominance.h"
+#include "cfg.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-fold.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "gimplify.h"
+#include "gimplify-me.h"
+#include "stor-layout.h"
+#include "gimple-ssa.h"
+#include "tree-cfg.h"
+#include "tree-phinodes.h"
+#include "ssa-iterators.h"
+#include "stringpool.h"
+#include "tree-ssanames.h"
+#include "expr.h"
+#include "tree-dfa.h"
+#include "tree-ssa.h"
#include "tree-pass.h"
#include "alloc-pool.h"
-#include "basic-block.h"
#include "target.h"
#include "gimple-pretty-print.h"
+#include "builtins.h"
/* FIXME: RTL headers have to be included here for optabs. */
#include "rtl.h" /* Because optabs.h wants enum rtx_code. */
#include "expr.h" /* Because optabs.h wants sepops. */
+#include "insn-codes.h"
#include "optabs.h"
/* This structure represents one basic block that either computes a
static struct
{
- /* Number of hand-written 32-bit bswaps found. */
+ /* Number of hand-written 16-bit nop / bswaps found. */
+ int found_16bit;
+
+ /* Number of hand-written 32-bit nop / bswaps found. */
int found_32bit;
- /* Number of hand-written 64-bit bswaps found. */
+ /* Number of hand-written 64-bit nop / bswaps found. */
int found_64bit;
-} bswap_stats;
+} nop_stats, bswap_stats;
static struct
{
if (!occ)
{
occ = occ_new (bb, NULL);
- insert_bb (occ, ENTRY_BLOCK_PTR, &occ_head);
+ insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
}
occ->bb_has_division = true;
{
/* Make a variable with the replacement and substitute it. */
type = TREE_TYPE (def);
- recip_def = make_rename_temp (type, "reciptmp");
+ recip_def = create_tmp_reg (type, "reciptmp");
new_stmt = gimple_build_assign_with_ops (RDIV_EXPR, recip_def,
build_one_cst (type), def);
occ_head = NULL;
}
-static bool
-gate_cse_reciprocals (void)
-{
- return optimize && flag_reciprocal_math;
-}
-
/* Go through all the floating-point SSA_NAMEs, and call
execute_cse_reciprocals_1 on each of them. */
-static unsigned int
-execute_cse_reciprocals (void)
+namespace {
+
+const pass_data pass_data_cse_reciprocals =
+{
+ GIMPLE_PASS, /* type */
+ "recip", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_cse_reciprocals : public gimple_opt_pass
+{
+public:
+ pass_cse_reciprocals (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
+ virtual unsigned int execute (function *);
+
+}; // class pass_cse_reciprocals
+
+unsigned int
+pass_cse_reciprocals::execute (function *fun)
{
basic_block bb;
tree arg;
occ_pool = create_alloc_pool ("dominators for recip",
sizeof (struct occurrence),
- n_basic_blocks / 3 + 1);
+ n_basic_blocks_for_fn (fun) / 3 + 1);
memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
calculate_dominance_info (CDI_DOMINATORS);
calculate_dominance_info (CDI_POST_DOMINATORS);
#ifdef ENABLE_CHECKING
- FOR_EACH_BB (bb)
+ FOR_EACH_BB_FN (bb, fun)
gcc_assert (!bb->aux);
#endif
- for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = DECL_CHAIN (arg))
- if (gimple_default_def (cfun, arg)
- && FLOAT_TYPE_P (TREE_TYPE (arg))
+ for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
+ if (FLOAT_TYPE_P (TREE_TYPE (arg))
&& is_gimple_reg (arg))
- execute_cse_reciprocals_1 (NULL, gimple_default_def (cfun, arg));
+ {
+ tree name = ssa_default_def (fun, arg);
+ if (name)
+ execute_cse_reciprocals_1 (NULL, name);
+ }
- FOR_EACH_BB (bb)
+ FOR_EACH_BB_FN (bb, fun)
{
gimple_stmt_iterator gsi;
gimple phi;
{
phi = gsi_stmt (gsi);
def = PHI_RESULT (phi);
- if (FLOAT_TYPE_P (TREE_TYPE (def))
- && is_gimple_reg (def))
+ if (! virtual_operand_p (def)
+ && FLOAT_TYPE_P (TREE_TYPE (def)))
execute_cse_reciprocals_1 (NULL, def);
}
if (fail)
continue;
- gimple_replace_lhs (stmt1, arg1);
+ gimple_replace_ssa_lhs (stmt1, arg1);
gimple_call_set_fndecl (stmt1, fndecl);
update_stmt (stmt1);
reciprocal_stats.rfuncs_inserted++;
}
}
- statistics_counter_event (cfun, "reciprocal divs inserted",
+ statistics_counter_event (fun, "reciprocal divs inserted",
reciprocal_stats.rdivs_inserted);
- statistics_counter_event (cfun, "reciprocal functions inserted",
+ statistics_counter_event (fun, "reciprocal functions inserted",
reciprocal_stats.rfuncs_inserted);
free_dominance_info (CDI_DOMINATORS);
return 0;
}
-struct gimple_opt_pass pass_cse_reciprocals =
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_cse_reciprocals (gcc::context *ctxt)
{
- {
- GIMPLE_PASS,
- "recip", /* name */
- gate_cse_reciprocals, /* gate */
- execute_cse_reciprocals, /* execute */
- NULL, /* sub */
- NULL, /* next */
- 0, /* static_pass_number */
- TV_NONE, /* tv_id */
- PROP_ssa, /* properties_required */
- 0, /* properties_provided */
- 0, /* properties_destroyed */
- 0, /* todo_flags_start */
- TODO_update_ssa | TODO_verify_ssa
- | TODO_verify_stmts /* todo_flags_finish */
- }
-};
+ return new pass_cse_reciprocals (ctxt);
+}
/* Records an occurrence at statement USE_STMT in the vector of trees
STMTS if it is dominated by *TOP_BB or dominates it or this basic block
statements in the vector. */
static bool
-maybe_record_sincos (VEC(gimple, heap) **stmts,
+maybe_record_sincos (vec<gimple> *stmts,
basic_block *top_bb, gimple use_stmt)
{
basic_block use_bb = gimple_bb (use_stmt);
if (*top_bb
&& (*top_bb == use_bb
|| dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
- VEC_safe_push (gimple, heap, *stmts, use_stmt);
+ stmts->safe_push (use_stmt);
else if (!*top_bb
|| dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
{
- VEC_safe_push (gimple, heap, *stmts, use_stmt);
+ stmts->safe_push (use_stmt);
*top_bb = use_bb;
}
else
tree fndecl, res, type;
gimple def_stmt, use_stmt, stmt;
int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
- VEC(gimple, heap) *stmts = NULL;
+ vec<gimple> stmts = vNULL;
basic_block top_bb = NULL;
int i;
bool cfg_changed = false;
if (seen_cos + seen_sin + seen_cexpi <= 1)
{
- VEC_free(gimple, heap, stmts);
+ stmts.release ();
return false;
}
fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
if (!fndecl)
return false;
- res = create_tmp_reg (TREE_TYPE (TREE_TYPE (fndecl)), "sincostmp");
stmt = gimple_build_call (fndecl, 1, name);
- res = make_ssa_name (res, stmt);
+ res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
gimple_call_set_lhs (stmt, res);
def_stmt = SSA_NAME_DEF_STMT (name);
gsi = gsi_after_labels (top_bb);
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
}
- update_stmt (stmt);
sincos_stats.inserted++;
/* And adjust the recorded old call sites. */
- for (i = 0; VEC_iterate(gimple, stmts, i, use_stmt); ++i)
+ for (i = 0; stmts.iterate (i, &use_stmt); ++i)
{
tree rhs = NULL;
fndecl = gimple_call_fndecl (use_stmt);
cfg_changed = true;
}
- VEC_free(gimple, heap, stmts);
+ stmts.release ();
return cfg_changed;
}
static tree
powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
- HOST_WIDE_INT n, tree *cache, tree target)
+ HOST_WIDE_INT n, tree *cache)
{
tree op0, op1, ssa_target;
unsigned HOST_WIDE_INT digit;
if (n < POWI_TABLE_SIZE && cache[n])
return cache[n];
- ssa_target = make_ssa_name (target, NULL);
+ ssa_target = make_temp_ssa_name (type, NULL, "powmult");
if (n < POWI_TABLE_SIZE)
{
cache[n] = ssa_target;
- op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache, target);
- op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache, target);
+ op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
+ op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
}
else if (n & 1)
{
digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
- op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache, target);
- op1 = powi_as_mults_1 (gsi, loc, type, digit, cache, target);
+ op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
+ op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
}
else
{
- op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache, target);
+ op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
op1 = op0;
}
powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
tree arg0, HOST_WIDE_INT n)
{
- tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0), target;
+ tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
gimple div_stmt;
+ tree target;
if (n == 0)
return build_real (type, dconst1);
memset (cache, 0, sizeof (cache));
cache[1] = arg0;
- target = create_tmp_reg (type, "powmult");
- add_referenced_var (target);
-
- result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache, target);
-
+ result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
if (n >= 0)
return result;
/* If the original exponent was negative, reciprocate the result. */
- target = make_ssa_name (target, NULL);
+ target = make_temp_ssa_name (type, NULL, "powmult");
div_stmt = gimple_build_assign_with_ops (RDIV_EXPR, target,
build_real (type, dconst1),
result);
}
/* Build a gimple call statement that calls FN with argument ARG.
- Set the lhs of the call statement to a fresh SSA name for
- variable VAR. If VAR is NULL, first allocate it. Insert the
+ Set the lhs of the call statement to a fresh SSA name. Insert the
statement prior to GSI's current position, and return the fresh
SSA name. */
static tree
build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
- tree *var, tree fn, tree arg)
+ tree fn, tree arg)
{
gimple call_stmt;
tree ssa_target;
- if (!*var)
- {
- *var = create_tmp_reg (TREE_TYPE (arg), "powroot");
- add_referenced_var (*var);
- }
-
call_stmt = gimple_build_call (fn, 1, arg);
- ssa_target = make_ssa_name (*var, NULL);
+ ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
gimple_set_lhs (call_stmt, ssa_target);
gimple_set_location (call_stmt, loc);
gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
static tree
build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
- tree target, enum tree_code code, tree arg0, tree arg1)
+ const char *name, enum tree_code code,
+ tree arg0, tree arg1)
{
- tree result = make_ssa_name (target, NULL);
+ tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
gimple stmt = gimple_build_assign_with_ops (code, result, arg0, arg1);
gimple_set_location (stmt, loc);
gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
}
/* Build a gimple reference operation with the given CODE and argument
- ARG, assigning the result to a new SSA name for variable TARGET.
+ ARG, assigning the result to a new SSA name of TYPE with NAME.
Insert the statement prior to GSI's current position, and return
the fresh SSA name. */
static inline tree
build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
- tree target, enum tree_code code, tree arg0)
+ const char *name, enum tree_code code, tree arg0)
{
- tree result = make_ssa_name (target, NULL);
+ tree result = make_temp_ssa_name (type, NULL, name);
gimple stmt = gimple_build_assign (result, build1 (code, type, arg0));
gimple_set_location (stmt, loc);
gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
return result;
}
-/* Build a gimple assignment to cast VAL to TARGET. Insert the statement
+/* Build a gimple assignment to cast VAL to TYPE. Insert the statement
prior to GSI's current position, and return the fresh SSA name. */
static tree
build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
- tree target, tree val)
+ tree type, tree val)
{
- return build_and_insert_binop (gsi, loc, target, CONVERT_EXPR, val, NULL);
+ tree result = make_ssa_name (type, NULL);
+ gimple stmt = gimple_build_assign_with_ops (NOP_EXPR, result, val, NULL_TREE);
+ gimple_set_location (stmt, loc);
+ gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+ return result;
}
/* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
REAL_VALUE_TYPE c2, dconst3;
HOST_WIDE_INT n;
tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, result, cbrt_x, powi_cbrt_x;
- tree target = NULL_TREE;
- enum machine_mode mode;
- bool hw_sqrt_exists;
+ machine_mode mode;
+ bool hw_sqrt_exists, c_is_int, c2_is_int;
/* If the exponent isn't a constant, there's nothing of interest
to be done. */
multiplication sequence when profitable. */
c = TREE_REAL_CST (arg1);
n = real_to_integer (&c);
- real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
+ real_from_integer (&cint, VOIDmode, n, SIGNED);
+ c_is_int = real_identical (&c, &cint);
- if (real_identical (&c, &cint)
+ if (c_is_int
&& ((n >= -1 && n <= 2)
|| (flag_unsafe_math_optimizations
- && optimize_insn_for_speed_p ()
+ && optimize_bb_for_speed_p (gsi_bb (*gsi))
&& powi_cost (n) <= POWI_MAX_MULTS)))
return gimple_expand_builtin_powi (gsi, loc, arg0, n);
if (sqrtfn
&& REAL_VALUES_EQUAL (c, dconsthalf)
&& !HONOR_SIGNED_ZEROS (mode))
- return build_and_insert_call (gsi, loc, &target, sqrtfn, arg0);
+ return build_and_insert_call (gsi, loc, sqrtfn, arg0);
/* Optimize pow(x,0.25) = sqrt(sqrt(x)). Assume on most machines that
a builtin sqrt instruction is smaller than a call to pow with 0.25,
&& hw_sqrt_exists)
{
/* sqrt(x) */
- sqrt_arg0 = build_and_insert_call (gsi, loc, &target, sqrtfn, arg0);
+ sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
/* sqrt(sqrt(x)) */
- return build_and_insert_call (gsi, loc, &target, sqrtfn, sqrt_arg0);
+ return build_and_insert_call (gsi, loc, sqrtfn, sqrt_arg0);
}
/* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)) unless we are
optimizing for space. Don't do this optimization if we don't have
a hardware sqrt insn. */
- real_from_integer (&dconst3_4, VOIDmode, 3, 0, 0);
+ real_from_integer (&dconst3_4, VOIDmode, 3, SIGNED);
SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2);
if (flag_unsafe_math_optimizations
&& hw_sqrt_exists)
{
/* sqrt(x) */
- sqrt_arg0 = build_and_insert_call (gsi, loc, &target, sqrtfn, arg0);
+ sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
/* sqrt(sqrt(x)) */
- sqrt_sqrt = build_and_insert_call (gsi, loc, &target, sqrtfn, sqrt_arg0);
+ sqrt_sqrt = build_and_insert_call (gsi, loc, sqrtfn, sqrt_arg0);
/* sqrt(x) * sqrt(sqrt(x)) */
- return build_and_insert_binop (gsi, loc, target, MULT_EXPR,
+ return build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
sqrt_arg0, sqrt_sqrt);
}
&& cbrtfn
&& (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
&& REAL_VALUES_EQUAL (c, dconst1_3))
- return build_and_insert_call (gsi, loc, &target, cbrtfn, arg0);
+ return build_and_insert_call (gsi, loc, cbrtfn, arg0);
/* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
if we don't have a hardware sqrt insn. */
&& REAL_VALUES_EQUAL (c, dconst1_6))
{
/* sqrt(x) */
- sqrt_arg0 = build_and_insert_call (gsi, loc, &target, sqrtfn, arg0);
+ sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
/* cbrt(sqrt(x)) */
- return build_and_insert_call (gsi, loc, &target, cbrtfn, sqrt_arg0);
+ return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
}
- /* Optimize pow(x,c), where n = 2c for some nonzero integer n, into
+ /* Optimize pow(x,c), where n = 2c for some nonzero integer n
+ and c not an integer, into
sqrt(x) * powi(x, n/2), n > 0;
1.0 / (sqrt(x) * powi(x, abs(n/2))), n < 0.
Do not calculate the powi factor when n/2 = 0. */
real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
n = real_to_integer (&c2);
- real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
+ real_from_integer (&cint, VOIDmode, n, SIGNED);
+ c2_is_int = real_identical (&c2, &cint);
if (flag_unsafe_math_optimizations
&& sqrtfn
- && real_identical (&c2, &cint))
+ && c2_is_int
+ && !c_is_int
+ && optimize_function_for_speed_p (cfun))
{
tree powi_x_ndiv2 = NULL_TREE;
/* Calculate sqrt(x). When n is not 1 or -1, multiply it by the
result of the optimal multiply sequence just calculated. */
- sqrt_arg0 = build_and_insert_call (gsi, loc, &target, sqrtfn, arg0);
+ sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
if (absu_hwi (n) == 1)
result = sqrt_arg0;
else
- result = build_and_insert_binop (gsi, loc, target, MULT_EXPR,
+ result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
sqrt_arg0, powi_x_ndiv2);
/* If n is negative, reciprocate the result. */
if (n < 0)
- result = build_and_insert_binop (gsi, loc, target, RDIV_EXPR,
+ result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
build_real (type, dconst1), result);
return result;
}
different from pow(x, 1./3.) due to rounding and behavior with
negative x, we need to constrain this transformation to unsafe
math and positive x or finite math. */
- real_from_integer (&dconst3, VOIDmode, 3, 0, 0);
+ real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
real_round (&c2, mode, &c2);
n = real_to_integer (&c2);
- real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
+ real_from_integer (&cint, VOIDmode, n, SIGNED);
real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
real_convert (&c2, mode, &c2);
&& cbrtfn
&& (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
&& real_identical (&c2, &c)
+ && !c2_is_int
&& optimize_function_for_speed_p (cfun)
&& powi_cost (n / 3) <= POWI_MAX_MULTS)
{
/* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
as that creates an unnecessary variable. Instead, just produce
either cbrt(x) or cbrt(x) * cbrt(x). */
- cbrt_x = build_and_insert_call (gsi, loc, &target, cbrtfn, arg0);
+ cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
if (absu_hwi (n) % 3 == 1)
powi_cbrt_x = cbrt_x;
else
- powi_cbrt_x = build_and_insert_binop (gsi, loc, target, MULT_EXPR,
+ powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
cbrt_x, cbrt_x);
/* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
if (absu_hwi (n) < 3)
result = powi_cbrt_x;
else
- result = build_and_insert_binop (gsi, loc, target, MULT_EXPR,
+ result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
powi_x_ndiv3, powi_cbrt_x);
/* If n is negative, reciprocate the result. */
if (n < 0)
- result = build_and_insert_binop (gsi, loc, target, RDIV_EXPR,
+ result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
build_real (type, dconst1), result);
return result;
static tree
gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
{
- tree target, real_part, imag_part, addend1, addend2, sum, result;
+ tree real_part, imag_part, addend1, addend2, sum, result;
tree type = TREE_TYPE (TREE_TYPE (arg));
tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
- enum machine_mode mode = TYPE_MODE (type);
+ machine_mode mode = TYPE_MODE (type);
if (!flag_unsafe_math_optimizations
|| !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
|| optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
return NULL_TREE;
- target = create_tmp_reg (type, "cabs");
- add_referenced_var (target);
-
- real_part = build_and_insert_ref (gsi, loc, type, target,
+ real_part = build_and_insert_ref (gsi, loc, type, "cabs",
REALPART_EXPR, arg);
- addend1 = build_and_insert_binop (gsi, loc, target, MULT_EXPR,
+ addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
real_part, real_part);
- imag_part = build_and_insert_ref (gsi, loc, type, target,
+ imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
IMAGPART_EXPR, arg);
- addend2 = build_and_insert_binop (gsi, loc, target, MULT_EXPR,
+ addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
imag_part, imag_part);
- sum = build_and_insert_binop (gsi, loc, target, PLUS_EXPR, addend1, addend2);
- result = build_and_insert_call (gsi, loc, &target, sqrtfn, sum);
+ sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
+ result = build_and_insert_call (gsi, loc, sqrtfn, sum);
return result;
}
on the SSA_NAME argument of each of them. Also expand powi(x,n) into
an optimal number of multiplies, when n is a constant. */
-static unsigned int
-execute_cse_sincos (void)
+namespace {
+
+const pass_data pass_data_cse_sincos =
+{
+ GIMPLE_PASS, /* type */
+ "sincos", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_cse_sincos : public gimple_opt_pass
+{
+public:
+ pass_cse_sincos (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_cse_sincos, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *)
+ {
+ /* We no longer require either sincos or cexp, since powi expansion
+ piggybacks on this pass. */
+ return optimize;
+ }
+
+ virtual unsigned int execute (function *);
+
+}; // class pass_cse_sincos
+
+unsigned int
+pass_cse_sincos::execute (function *fun)
{
basic_block bb;
bool cfg_changed = false;
calculate_dominance_info (CDI_DOMINATORS);
memset (&sincos_stats, 0, sizeof (sincos_stats));
- FOR_EACH_BB (bb)
+ FOR_EACH_BB_FN (bb, fun)
{
gimple_stmt_iterator gsi;
+ bool cleanup_eh = false;
for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple stmt = gsi_stmt (gsi);
tree fndecl;
+ /* Only the last stmt in a bb could throw, no need to call
+ gimple_purge_dead_eh_edges if we change something in the middle
+ of a basic block. */
+ cleanup_eh = false;
+
if (is_gimple_call (stmt)
&& gimple_call_lhs (stmt)
&& (fndecl = gimple_call_fndecl (stmt))
CASE_FLT_FN (BUILT_IN_SIN):
CASE_FLT_FN (BUILT_IN_CEXPI):
/* Make sure we have either sincos or cexp. */
- if (!TARGET_HAS_SINCOS && !TARGET_C99_FUNCTIONS)
+ if (!targetm.libc_has_function (function_c99_math_complex)
+ && !targetm.libc_has_function (function_sincos))
break;
arg = gimple_call_arg (stmt, 0);
gimple_set_location (new_stmt, loc);
unlink_stmt_vdef (stmt);
gsi_replace (&gsi, new_stmt, true);
+ cleanup_eh = true;
if (gimple_vdef (stmt))
release_ssa_name (gimple_vdef (stmt));
}
CASE_FLT_FN (BUILT_IN_POWI):
arg0 = gimple_call_arg (stmt, 0);
arg1 = gimple_call_arg (stmt, 1);
- if (!host_integerp (arg1, 0))
- break;
-
- n = TREE_INT_CST_LOW (arg1);
loc = gimple_location (stmt);
- result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
+
+ if (real_minus_onep (arg0))
+ {
+ tree t0, t1, cond, one, minus_one;
+ gimple stmt;
+
+ t0 = TREE_TYPE (arg0);
+ t1 = TREE_TYPE (arg1);
+ one = build_real (t0, dconst1);
+ minus_one = build_real (t0, dconstm1);
+
+ cond = make_temp_ssa_name (t1, NULL, "powi_cond");
+ stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, cond,
+ arg1,
+ build_int_cst (t1,
+ 1));
+ gimple_set_location (stmt, loc);
+ gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+
+ result = make_temp_ssa_name (t0, NULL, "powi");
+ stmt = gimple_build_assign_with_ops (COND_EXPR, result,
+ cond,
+ minus_one, one);
+ gimple_set_location (stmt, loc);
+ gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+ }
+ else
+ {
+ if (!tree_fits_shwi_p (arg1))
+ break;
+
+ n = tree_to_shwi (arg1);
+ result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
+ }
if (result)
{
gimple_set_location (new_stmt, loc);
unlink_stmt_vdef (stmt);
gsi_replace (&gsi, new_stmt, true);
+ cleanup_eh = true;
if (gimple_vdef (stmt))
release_ssa_name (gimple_vdef (stmt));
}
gimple_set_location (new_stmt, loc);
unlink_stmt_vdef (stmt);
gsi_replace (&gsi, new_stmt, true);
+ cleanup_eh = true;
if (gimple_vdef (stmt))
release_ssa_name (gimple_vdef (stmt));
}
}
}
}
+ if (cleanup_eh)
+ cfg_changed |= gimple_purge_dead_eh_edges (bb);
}
- statistics_counter_event (cfun, "sincos statements inserted",
+ statistics_counter_event (fun, "sincos statements inserted",
sincos_stats.inserted);
free_dominance_info (CDI_DOMINATORS);
return cfg_changed ? TODO_cleanup_cfg : 0;
}
-static bool
-gate_cse_sincos (void)
-{
- /* We no longer require either sincos or cexp, since powi expansion
- piggybacks on this pass. */
- return optimize;
-}
+} // anon namespace
-struct gimple_opt_pass pass_cse_sincos =
+gimple_opt_pass *
+make_pass_cse_sincos (gcc::context *ctxt)
{
- {
- GIMPLE_PASS,
- "sincos", /* name */
- gate_cse_sincos, /* gate */
- execute_cse_sincos, /* execute */
- NULL, /* sub */
- NULL, /* next */
- 0, /* static_pass_number */
- TV_NONE, /* tv_id */
- PROP_ssa, /* properties_required */
- 0, /* properties_provided */
- 0, /* properties_destroyed */
- 0, /* todo_flags_start */
- TODO_update_ssa | TODO_verify_ssa
- | TODO_verify_stmts /* todo_flags_finish */
- }
-};
+ return new pass_cse_sincos (ctxt);
+}
/* A symbolic number is used to detect byte permutation and selection
patterns. Therefore the field N contains an artificial number
- consisting of byte size markers:
+ consisting of octet sized markers:
+
+ 0 - target byte has the value 0
+ FF - target byte has an unknown value (eg. due to sign extension)
+ 1..size - marker value is the target byte index minus one.
- 0 - byte has the value 0
- 1..size - byte contains the content of the byte
- number indexed with that value minus one */
+ To detect permutations on memory sources (arrays and structures), a symbolic
+ number is also associated a base address (the array or structure the load is
+ made from), an offset from the base address and a range which gives the
+ difference between the highest and lowest accessed memory location to make
+ such a symbolic number. The range is thus different from size which reflects
+ the size of the type of current expression. Note that for non memory source,
+ range holds the same value as size.
+
+ For instance, for an array char a[], (short) a[0] | (short) a[3] would have
+ a size of 2 but a range of 4 while (short) a[0] | ((short) a[0] << 1) would
+ still have a size of 2 but this time a range of 1. */
struct symbolic_number {
- unsigned HOST_WIDEST_INT n;
- int size;
+ uint64_t n;
+ tree type;
+ tree base_addr;
+ tree offset;
+ HOST_WIDE_INT bytepos;
+ tree alias_set;
+ tree vuse;
+ unsigned HOST_WIDE_INT range;
};
+#define BITS_PER_MARKER 8
+#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
+#define MARKER_BYTE_UNKNOWN MARKER_MASK
+#define HEAD_MARKER(n, size) \
+ ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
+
+/* The number which the find_bswap_or_nop_1 result should match in
+ order to have a nop. The number is masked according to the size of
+ the symbolic number before using it. */
+#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
+ (uint64_t)0x08070605 << 32 | 0x04030201)
+
+/* The number which the find_bswap_or_nop_1 result should match in
+ order to have a byte swap. The number is masked according to the
+ size of the symbolic number before using it. */
+#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
+ (uint64_t)0x01020304 << 32 | 0x05060708)
+
/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
number N. Return false if the requested operation is not permitted
on a symbolic number. */
struct symbolic_number *n,
int count)
{
- if (count % 8 != 0)
+ int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
+ unsigned head_marker;
+
+ if (count % BITS_PER_UNIT != 0)
return false;
+ count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
/* Zero out the extra bits of N in order to avoid them being shifted
into the significant bits. */
- if (n->size < (int)sizeof (HOST_WIDEST_INT))
- n->n &= ((unsigned HOST_WIDEST_INT)1 << (n->size * BITS_PER_UNIT)) - 1;
+ if (size < 64 / BITS_PER_MARKER)
+ n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
switch (code)
{
n->n <<= count;
break;
case RSHIFT_EXPR:
+ head_marker = HEAD_MARKER (n->n, size);
n->n >>= count;
+ /* Arithmetic shift of signed type: result is dependent on the value. */
+ if (!TYPE_UNSIGNED (n->type) && head_marker)
+ for (i = 0; i < count / BITS_PER_MARKER; i++)
+ n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
+ << ((size - 1 - i) * BITS_PER_MARKER);
break;
case LROTATE_EXPR:
- n->n = (n->n << count) | (n->n >> ((n->size * BITS_PER_UNIT) - count));
+ n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
break;
case RROTATE_EXPR:
- n->n = (n->n >> count) | (n->n << ((n->size * BITS_PER_UNIT) - count));
+ n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
break;
default:
return false;
}
/* Zero unused bits for size. */
- if (n->size < (int)sizeof (HOST_WIDEST_INT))
- n->n &= ((unsigned HOST_WIDEST_INT)1 << (n->size * BITS_PER_UNIT)) - 1;
+ if (size < 64 / BITS_PER_MARKER)
+ n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
return true;
}
if (TREE_CODE (lhs_type) != INTEGER_TYPE)
return false;
- if (TYPE_PRECISION (lhs_type) != n->size * BITS_PER_UNIT)
+ if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
return false;
return true;
}
-/* find_bswap_1 invokes itself recursively with N and tries to perform
- the operation given by the rhs of STMT on the result. If the
- operation could successfully be executed the function returns the
- tree expression of the source operand and NULL otherwise. */
+/* Initialize the symbolic number N for the bswap pass from the base element
+ SRC manipulated by the bitwise OR expression. */
-static tree
-find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
+static bool
+init_symbolic_number (struct symbolic_number *n, tree src)
+{
+ int size;
+
+ n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
+
+ /* Set up the symbolic number N by setting each byte to a value between 1 and
+ the byte size of rhs1. The highest order byte is set to n->size and the
+ lowest order byte to 1. */
+ n->type = TREE_TYPE (src);
+ size = TYPE_PRECISION (n->type);
+ if (size % BITS_PER_UNIT != 0)
+ return false;
+ size /= BITS_PER_UNIT;
+ if (size > 64 / BITS_PER_MARKER)
+ return false;
+ n->range = size;
+ n->n = CMPNOP;
+
+ if (size < 64 / BITS_PER_MARKER)
+ n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
+
+ return true;
+}
+
+/* Check if STMT might be a byte swap or a nop from a memory source and returns
+ the answer. If so, REF is that memory source and the base of the memory area
+ accessed and the offset of the access from that base are recorded in N. */
+
+bool
+find_bswap_or_nop_load (gimple stmt, tree ref, struct symbolic_number *n)
+{
+ /* Leaf node is an array or component ref. Memorize its base and
+ offset from base to compare to other such leaf node. */
+ HOST_WIDE_INT bitsize, bitpos;
+ machine_mode mode;
+ int unsignedp, volatilep;
+ tree offset, base_addr;
+
+ if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
+ return false;
+
+ base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
+ &unsignedp, &volatilep, false);
+
+ if (TREE_CODE (base_addr) == MEM_REF)
+ {
+ offset_int bit_offset = 0;
+ tree off = TREE_OPERAND (base_addr, 1);
+
+ if (!integer_zerop (off))
+ {
+ offset_int boff, coff = mem_ref_offset (base_addr);
+ boff = wi::lshift (coff, LOG2_BITS_PER_UNIT);
+ bit_offset += boff;
+ }
+
+ base_addr = TREE_OPERAND (base_addr, 0);
+
+ /* Avoid returning a negative bitpos as this may wreak havoc later. */
+ if (wi::neg_p (bit_offset))
+ {
+ offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false);
+ offset_int tem = bit_offset.and_not (mask);
+ /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
+ Subtract it to BIT_OFFSET and add it (scaled) to OFFSET. */
+ bit_offset -= tem;
+ tem = wi::arshift (tem, LOG2_BITS_PER_UNIT);
+ if (offset)
+ offset = size_binop (PLUS_EXPR, offset,
+ wide_int_to_tree (sizetype, tem));
+ else
+ offset = wide_int_to_tree (sizetype, tem);
+ }
+
+ bitpos += bit_offset.to_shwi ();
+ }
+
+ if (bitpos % BITS_PER_UNIT)
+ return false;
+ if (bitsize % BITS_PER_UNIT)
+ return false;
+
+ if (!init_symbolic_number (n, ref))
+ return false;
+ n->base_addr = base_addr;
+ n->offset = offset;
+ n->bytepos = bitpos / BITS_PER_UNIT;
+ n->alias_set = reference_alias_ptr_type (ref);
+ n->vuse = gimple_vuse (stmt);
+ return true;
+}
+
+/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
+ the operation given by the rhs of STMT on the result. If the operation
+ could successfully be executed the function returns a gimple stmt whose
+ rhs's first tree is the expression of the source operand and NULL
+ otherwise. */
+
+static gimple
+find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit)
{
enum tree_code code;
tree rhs1, rhs2 = NULL;
- gimple rhs1_stmt, rhs2_stmt;
- tree source_expr1;
+ gimple rhs1_stmt, rhs2_stmt, source_stmt1;
enum gimple_rhs_class rhs_class;
if (!limit || !is_gimple_assign (stmt))
- return NULL_TREE;
+ return NULL;
rhs1 = gimple_assign_rhs1 (stmt);
+ if (find_bswap_or_nop_load (stmt, rhs1, n))
+ return stmt;
+
if (TREE_CODE (rhs1) != SSA_NAME)
- return NULL_TREE;
+ return NULL;
code = gimple_assign_rhs_code (stmt);
rhs_class = gimple_assign_rhs_class (stmt);
&& code != RSHIFT_EXPR
&& code != LROTATE_EXPR
&& code != RROTATE_EXPR
- && code != NOP_EXPR
- && code != CONVERT_EXPR)
- return NULL_TREE;
+ && !CONVERT_EXPR_CODE_P (code))
+ return NULL;
- source_expr1 = find_bswap_1 (rhs1_stmt, n, limit - 1);
+ source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
- /* If find_bswap_1 returned NULL STMT is a leaf node and we have
- to initialize the symbolic number. */
- if (!source_expr1)
+ /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
+ we have to initialize the symbolic number. */
+ if (!source_stmt1)
{
- /* Set up the symbolic number N by setting each byte to a
- value between 1 and the byte size of rhs1. The highest
- order byte is set to n->size and the lowest order
- byte to 1. */
- n->size = TYPE_PRECISION (TREE_TYPE (rhs1));
- if (n->size % BITS_PER_UNIT != 0)
- return NULL_TREE;
- n->size /= BITS_PER_UNIT;
- n->n = (sizeof (HOST_WIDEST_INT) < 8 ? 0 :
- (unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201);
-
- if (n->size < (int)sizeof (HOST_WIDEST_INT))
- n->n &= ((unsigned HOST_WIDEST_INT)1 <<
- (n->size * BITS_PER_UNIT)) - 1;
-
- source_expr1 = rhs1;
+ if (gimple_assign_load_p (stmt)
+ || !init_symbolic_number (n, rhs1))
+ return NULL;
+ source_stmt1 = stmt;
}
switch (code)
{
case BIT_AND_EXPR:
{
- int i;
- unsigned HOST_WIDEST_INT val = widest_int_cst_value (rhs2);
- unsigned HOST_WIDEST_INT tmp = val;
+ int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
+ uint64_t val = int_cst_value (rhs2), mask = 0;
+ uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
/* Only constants masking full bytes are allowed. */
- for (i = 0; i < n->size; i++, tmp >>= BITS_PER_UNIT)
- if ((tmp & 0xff) != 0 && (tmp & 0xff) != 0xff)
- return NULL_TREE;
+ for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
+ if ((val & tmp) != 0 && (val & tmp) != tmp)
+ return NULL;
+ else if (val & tmp)
+ mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
- n->n &= val;
+ n->n &= mask;
}
break;
case LSHIFT_EXPR:
case LROTATE_EXPR:
case RROTATE_EXPR:
if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2)))
- return NULL_TREE;
+ return NULL;
break;
CASE_CONVERT:
{
- int type_size;
+ int i, type_size, old_type_size;
+ tree type;
- type_size = TYPE_PRECISION (gimple_expr_type (stmt));
+ type = gimple_expr_type (stmt);
+ type_size = TYPE_PRECISION (type);
if (type_size % BITS_PER_UNIT != 0)
- return NULL_TREE;
-
- if (type_size / BITS_PER_UNIT < (int)(sizeof (HOST_WIDEST_INT)))
+ return NULL;
+ type_size /= BITS_PER_UNIT;
+ if (type_size > 64 / BITS_PER_MARKER)
+ return NULL;
+
+ /* Sign extension: result is dependent on the value. */
+ old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
+ if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
+ && HEAD_MARKER (n->n, old_type_size))
+ for (i = 0; i < type_size - old_type_size; i++)
+ n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
+ << ((type_size - 1 - i) * BITS_PER_MARKER);
+
+ if (type_size < 64 / BITS_PER_MARKER)
{
/* If STMT casts to a smaller type mask out the bits not
belonging to the target type. */
- n->n &= ((unsigned HOST_WIDEST_INT)1 << type_size) - 1;
+ n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
}
- n->size = type_size / BITS_PER_UNIT;
+ n->type = type;
+ if (!n->base_addr)
+ n->range = type_size;
}
break;
default:
- return NULL_TREE;
+ return NULL;
};
- return verify_symbolic_number_p (n, stmt) ? source_expr1 : NULL;
+ return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
}
/* Handle binary rhs. */
if (rhs_class == GIMPLE_BINARY_RHS)
{
+ int i, size;
struct symbolic_number n1, n2;
- tree source_expr2;
+ uint64_t mask;
+ gimple source_stmt2;
if (code != BIT_IOR_EXPR)
- return NULL_TREE;
+ return NULL;
if (TREE_CODE (rhs2) != SSA_NAME)
- return NULL_TREE;
+ return NULL;
rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
switch (code)
{
case BIT_IOR_EXPR:
- source_expr1 = find_bswap_1 (rhs1_stmt, &n1, limit - 1);
+ source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
- if (!source_expr1)
- return NULL_TREE;
+ if (!source_stmt1)
+ return NULL;
- source_expr2 = find_bswap_1 (rhs2_stmt, &n2, limit - 1);
+ source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
- if (source_expr1 != source_expr2
- || n1.size != n2.size)
- return NULL_TREE;
+ if (!source_stmt2)
+ return NULL;
+
+ if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
+ return NULL;
+
+ if (!n1.vuse != !n2.vuse ||
+ (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
+ return NULL;
- n->size = n1.size;
+ if (gimple_assign_rhs1 (source_stmt1)
+ != gimple_assign_rhs1 (source_stmt2))
+ {
+ int64_t inc;
+ HOST_WIDE_INT off_sub;
+ struct symbolic_number *n_ptr;
+
+ if (!n1.base_addr || !n2.base_addr
+ || !operand_equal_p (n1.base_addr, n2.base_addr, 0))
+ return NULL;
+ if (!n1.offset != !n2.offset ||
+ (n1.offset && !operand_equal_p (n1.offset, n2.offset, 0)))
+ return NULL;
+
+ /* We swap n1 with n2 to have n1 < n2. */
+ if (n2.bytepos < n1.bytepos)
+ {
+ struct symbolic_number tmpn;
+
+ tmpn = n2;
+ n2 = n1;
+ n1 = tmpn;
+ source_stmt1 = source_stmt2;
+ }
+
+ off_sub = n2.bytepos - n1.bytepos;
+
+ /* Check that the range of memory covered can be represented by
+ a symbolic number. */
+ if (off_sub + n2.range > 64 / BITS_PER_MARKER)
+ return NULL;
+ n->range = n2.range + off_sub;
+
+ /* Reinterpret byte marks in symbolic number holding the value of
+ bigger weight according to target endianness. */
+ inc = BYTES_BIG_ENDIAN ? off_sub + n2.range - n1.range : off_sub;
+ size = TYPE_PRECISION (n1.type) / BITS_PER_UNIT;
+ if (BYTES_BIG_ENDIAN)
+ n_ptr = &n1;
+ else
+ n_ptr = &n2;
+ for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
+ {
+ unsigned marker =
+ (n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
+ if (marker && marker != MARKER_BYTE_UNKNOWN)
+ n_ptr->n += inc;
+ }
+ }
+ else
+ n->range = n1.range;
+
+ if (!n1.alias_set
+ || alias_ptr_types_compatible_p (n1.alias_set, n2.alias_set))
+ n->alias_set = n1.alias_set;
+ else
+ n->alias_set = ptr_type_node;
+ n->vuse = n1.vuse;
+ n->base_addr = n1.base_addr;
+ n->offset = n1.offset;
+ n->bytepos = n1.bytepos;
+ n->type = n1.type;
+ size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
+ for (i = 0, mask = MARKER_MASK; i < size;
+ i++, mask <<= BITS_PER_MARKER)
+ {
+ uint64_t masked1, masked2;
+
+ masked1 = n1.n & mask;
+ masked2 = n2.n & mask;
+ if (masked1 && masked2 && masked1 != masked2)
+ return NULL;
+ }
n->n = n1.n | n2.n;
if (!verify_symbolic_number_p (n, stmt))
- return NULL_TREE;
+ return NULL;
break;
default:
- return NULL_TREE;
+ return NULL;
}
- return source_expr1;
+ return source_stmt1;
}
- return NULL_TREE;
+ return NULL;
}
-/* Check if STMT completes a bswap implementation consisting of ORs,
- SHIFTs and ANDs. Return the source tree expression on which the
- byte swap is performed and NULL if no bswap was found. */
+/* Check if STMT completes a bswap implementation or a read in a given
+ endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
+ accordingly. It also sets N to represent the kind of operations
+ performed: size of the resulting expression and whether it works on
+ a memory source, and if so alias-set and vuse. At last, the
+ function returns a stmt whose rhs's first tree is the source
+ expression. */
-static tree
-find_bswap (gimple stmt)
+static gimple
+find_bswap_or_nop (gimple stmt, struct symbolic_number *n, bool *bswap)
{
-/* The number which the find_bswap result should match in order to
- have a full byte swap. The number is shifted to the left according
- to the size of the symbolic number before using it. */
- unsigned HOST_WIDEST_INT cmp =
- sizeof (HOST_WIDEST_INT) < 8 ? 0 :
- (unsigned HOST_WIDEST_INT)0x01020304 << 32 | 0x05060708;
-
- struct symbolic_number n;
- tree source_expr;
+/* The number which the find_bswap_or_nop_1 result should match in order
+ to have a full byte swap. The number is shifted to the right
+ according to the size of the symbolic number before using it. */
+ uint64_t cmpxchg = CMPXCHG;
+ uint64_t cmpnop = CMPNOP;
+
+ gimple source_stmt;
int limit;
/* The last parameter determines the depth search limit. It usually
- correlates directly to the number of bytes to be touched. We
- increase that number by three here in order to also
- cover signed -> unsigned converions of the src operand as can be seen
+ correlates directly to the number n of bytes to be touched. We
+ increase that number by log2(n) + 1 here in order to also
+ cover signed -> unsigned conversions of the src operand as can be seen
in libgcc, and for initial shift/and operation of the src operand. */
limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
- source_expr = find_bswap_1 (stmt, &n, limit);
+ source_stmt = find_bswap_or_nop_1 (stmt, n, limit);
- if (!source_expr)
- return NULL_TREE;
+ if (!source_stmt)
+ return NULL;
- /* Zero out the extra bits of N and CMP. */
- if (n.size < (int)sizeof (HOST_WIDEST_INT))
+ /* Find real size of result (highest non zero byte). */
+ if (n->base_addr)
{
- unsigned HOST_WIDEST_INT mask =
- ((unsigned HOST_WIDEST_INT)1 << (n.size * BITS_PER_UNIT)) - 1;
+ int rsize;
+ uint64_t tmpn;
- n.n &= mask;
- cmp >>= (sizeof (HOST_WIDEST_INT) - n.size) * BITS_PER_UNIT;
+ for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
+ n->range = rsize;
}
- /* A complete byte swap should make the symbolic number to start
- with the largest digit in the highest order byte. */
- if (cmp != n.n)
- return NULL_TREE;
+ /* Zero out the extra bits of N and CMP*. */
+ if (n->range < (int) sizeof (int64_t))
+ {
+ uint64_t mask;
- return source_expr;
+ mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
+ cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
+ cmpnop &= mask;
+ }
+
+ /* A complete byte swap should make the symbolic number to start with
+ the largest digit in the highest order byte. Unchanged symbolic
+ number indicates a read with same endianness as target architecture. */
+ if (n->n == cmpnop)
+ *bswap = false;
+ else if (n->n == cmpxchg)
+ *bswap = true;
+ else
+ return NULL;
+
+ /* Useless bit manipulation performed by code. */
+ if (!n->base_addr && n->n == cmpnop)
+ return NULL;
+
+ n->range *= BITS_PER_UNIT;
+ return source_stmt;
}
-/* Find manual byte swap implementations and turn them into a bswap
- builtin invokation. */
+namespace {
+
+const pass_data pass_data_optimize_bswap =
+{
+ GIMPLE_PASS, /* type */
+ "bswap", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
-static unsigned int
-execute_optimize_bswap (void)
+class pass_optimize_bswap : public gimple_opt_pass
+{
+public:
+ pass_optimize_bswap (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *)
+ {
+ return flag_expensive_optimizations && optimize;
+ }
+
+ virtual unsigned int execute (function *);
+
+}; // class pass_optimize_bswap
+
+/* Perform the bswap optimization: replace the expression computed in the rhs
+ of CUR_STMT by an equivalent bswap, load or load + bswap expression.
+ Which of these alternatives replace the rhs is given by N->base_addr (non
+ null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
+ load to perform are also given in N while the builtin bswap invoke is given
+ in FNDEL. Finally, if a load is involved, SRC_STMT refers to one of the
+ load statements involved to construct the rhs in CUR_STMT and N->range gives
+ the size of the rhs expression for maintaining some statistics.
+
+ Note that if the replacement involve a load, CUR_STMT is moved just after
+ SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
+ changing of basic block. */
+
+static bool
+bswap_replace (gimple cur_stmt, gimple src_stmt, tree fndecl, tree bswap_type,
+ tree load_type, struct symbolic_number *n, bool bswap)
+{
+ gimple_stmt_iterator gsi;
+ tree src, tmp, tgt;
+ gimple bswap_stmt;
+
+ gsi = gsi_for_stmt (cur_stmt);
+ src = gimple_assign_rhs1 (src_stmt);
+ tgt = gimple_assign_lhs (cur_stmt);
+
+ /* Need to load the value from memory first. */
+ if (n->base_addr)
+ {
+ gimple_stmt_iterator gsi_ins = gsi_for_stmt (src_stmt);
+ tree addr_expr, addr_tmp, val_expr, val_tmp;
+ tree load_offset_ptr, aligned_load_type;
+ gimple addr_stmt, load_stmt;
+ unsigned align;
+
+ align = get_object_alignment (src);
+ if (bswap
+ && align < GET_MODE_ALIGNMENT (TYPE_MODE (load_type))
+ && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align))
+ return false;
+
+ /* Move cur_stmt just before one of the load of the original
+ to ensure it has the same VUSE. See PR61517 for what could
+ go wrong. */
+ gsi_move_before (&gsi, &gsi_ins);
+ gsi = gsi_for_stmt (cur_stmt);
+
+ /* Compute address to load from and cast according to the size
+ of the load. */
+ addr_expr = build_fold_addr_expr (unshare_expr (src));
+ if (is_gimple_min_invariant (addr_expr))
+ addr_tmp = addr_expr;
+ else
+ {
+ addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
+ "load_src");
+ addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
+ gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
+ }
+
+ /* Perform the load. */
+ aligned_load_type = load_type;
+ if (align < TYPE_ALIGN (load_type))
+ aligned_load_type = build_aligned_type (load_type, align);
+ load_offset_ptr = build_int_cst (n->alias_set, 0);
+ val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
+ load_offset_ptr);
+
+ if (!bswap)
+ {
+ if (n->range == 16)
+ nop_stats.found_16bit++;
+ else if (n->range == 32)
+ nop_stats.found_32bit++;
+ else
+ {
+ gcc_assert (n->range == 64);
+ nop_stats.found_64bit++;
+ }
+
+ /* Convert the result of load if necessary. */
+ if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
+ {
+ val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
+ "load_dst");
+ load_stmt = gimple_build_assign (val_tmp, val_expr);
+ gimple_set_vuse (load_stmt, n->vuse);
+ gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
+ gimple_assign_set_rhs_with_ops_1 (&gsi, NOP_EXPR, val_tmp,
+ NULL_TREE, NULL_TREE);
+ }
+ else
+ {
+ gimple_assign_set_rhs_with_ops_1 (&gsi, MEM_REF, val_expr,
+ NULL_TREE, NULL_TREE);
+ gimple_set_vuse (cur_stmt, n->vuse);
+ }
+ update_stmt (cur_stmt);
+
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "%d bit load in target endianness found at: ",
+ (int)n->range);
+ print_gimple_stmt (dump_file, cur_stmt, 0, 0);
+ }
+ return true;
+ }
+ else
+ {
+ val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
+ load_stmt = gimple_build_assign (val_tmp, val_expr);
+ gimple_set_vuse (load_stmt, n->vuse);
+ gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
+ }
+ src = val_tmp;
+ }
+
+ if (n->range == 16)
+ bswap_stats.found_16bit++;
+ else if (n->range == 32)
+ bswap_stats.found_32bit++;
+ else
+ {
+ gcc_assert (n->range == 64);
+ bswap_stats.found_64bit++;
+ }
+
+ tmp = src;
+
+ /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
+ are considered as rotation of 2N bit values by N bits is generally not
+ equivalent to a bswap. Consider for instance 0x01020304 >> 16 which gives
+ 0x03040102 while a bswap for that value is 0x04030201. */
+ if (bswap && n->range == 16)
+ {
+ tree count = build_int_cst (NULL, BITS_PER_UNIT);
+ bswap_type = TREE_TYPE (src);
+ src = fold_build2 (LROTATE_EXPR, bswap_type, src, count);
+ bswap_stmt = gimple_build_assign (NULL, src);
+ }
+ else
+ {
+ /* Convert the src expression if necessary. */
+ if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
+ {
+ gimple convert_stmt;
+ tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
+ convert_stmt = gimple_build_assign_with_ops (NOP_EXPR, tmp, src,
+ NULL);
+ gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
+ }
+
+ bswap_stmt = gimple_build_call (fndecl, 1, tmp);
+ }
+
+ tmp = tgt;
+
+ /* Convert the result if necessary. */
+ if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
+ {
+ gimple convert_stmt;
+ tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
+ convert_stmt = gimple_build_assign_with_ops (NOP_EXPR, tgt, tmp, NULL);
+ gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
+ }
+
+ gimple_set_lhs (bswap_stmt, tmp);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "%d bit bswap implementation found at: ",
+ (int)n->range);
+ print_gimple_stmt (dump_file, cur_stmt, 0, 0);
+ }
+
+ gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
+ gsi_remove (&gsi, true);
+ return true;
+}
+
+/* Find manual byte swap implementations as well as load in a given
+ endianness. Byte swaps are turned into a bswap builtin invokation
+ while endian loads are converted to bswap builtin invokation or
+ simple load according to the target endianness. */
+
+unsigned int
+pass_optimize_bswap::execute (function *fun)
{
basic_block bb;
- bool bswap32_p, bswap64_p;
+ bool bswap16_p, bswap32_p, bswap64_p;
bool changed = false;
- tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
+ tree bswap16_type = NULL_TREE, bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
if (BITS_PER_UNIT != 8)
return 0;
- if (sizeof (HOST_WIDEST_INT) < 8)
- return 0;
-
+ bswap16_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP16)
+ && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing);
bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
&& optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
&& (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
|| (bswap32_p && word_mode == SImode)));
- if (!bswap32_p && !bswap64_p)
- return 0;
-
/* Determine the argument type of the builtins. The code later on
assumes that the return and argument type are the same. */
+ if (bswap16_p)
+ {
+ tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP16);
+ bswap16_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
+ }
+
if (bswap32_p)
{
tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
}
+ memset (&nop_stats, 0, sizeof (nop_stats));
memset (&bswap_stats, 0, sizeof (bswap_stats));
- FOR_EACH_BB (bb)
+ FOR_EACH_BB_FN (bb, fun)
{
gimple_stmt_iterator gsi;
/* We do a reverse scan for bswap patterns to make sure we get the
- widest match. As bswap pattern matching doesn't handle
- previously inserted smaller bswap replacements as sub-
- patterns, the wider variant wouldn't be detected. */
- for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
+ widest match. As bswap pattern matching doesn't handle previously
+ inserted smaller bswap replacements as sub-patterns, the wider
+ variant wouldn't be detected. */
+ for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
{
- gimple stmt = gsi_stmt (gsi);
- tree bswap_src, bswap_type;
- tree bswap_tmp;
- tree fndecl = NULL_TREE;
- int type_size;
- gimple call;
-
- if (!is_gimple_assign (stmt)
- || gimple_assign_rhs_code (stmt) != BIT_IOR_EXPR)
+ gimple src_stmt, cur_stmt = gsi_stmt (gsi);
+ tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
+ enum tree_code code;
+ struct symbolic_number n;
+ bool bswap;
+
+ /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
+ might be moved to a different basic block by bswap_replace and gsi
+ must not points to it if that's the case. Moving the gsi_prev
+ there make sure that gsi points to the statement previous to
+ cur_stmt while still making sure that all statements are
+ considered in this basic block. */
+ gsi_prev (&gsi);
+
+ if (!is_gimple_assign (cur_stmt))
continue;
- type_size = TYPE_PRECISION (gimple_expr_type (stmt));
+ code = gimple_assign_rhs_code (cur_stmt);
+ switch (code)
+ {
+ case LROTATE_EXPR:
+ case RROTATE_EXPR:
+ if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
+ || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
+ % BITS_PER_UNIT)
+ continue;
+ /* Fall through. */
+ case BIT_IOR_EXPR:
+ break;
+ default:
+ continue;
+ }
+
+ src_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
- switch (type_size)
+ if (!src_stmt)
+ continue;
+
+ switch (n.range)
{
+ case 16:
+ /* Already in canonical form, nothing to do. */
+ if (code == LROTATE_EXPR || code == RROTATE_EXPR)
+ continue;
+ load_type = uint16_type_node;
+ if (bswap16_p)
+ {
+ fndecl = builtin_decl_explicit (BUILT_IN_BSWAP16);
+ bswap_type = bswap16_type;
+ }
+ break;
case 32:
+ load_type = uint32_type_node;
if (bswap32_p)
{
fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
}
break;
case 64:
+ load_type = uint64_type_node;
if (bswap64_p)
{
fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
continue;
}
- if (!fndecl)
+ if (bswap && !fndecl)
continue;
- bswap_src = find_bswap (stmt);
+ if (bswap_replace (cur_stmt, src_stmt, fndecl, bswap_type, load_type,
+ &n, bswap))
+ changed = true;
+ }
+ }
- if (!bswap_src)
- continue;
+ statistics_counter_event (fun, "16-bit nop implementations found",
+ nop_stats.found_16bit);
+ statistics_counter_event (fun, "32-bit nop implementations found",
+ nop_stats.found_32bit);
+ statistics_counter_event (fun, "64-bit nop implementations found",
+ nop_stats.found_64bit);
+ statistics_counter_event (fun, "16-bit bswap implementations found",
+ bswap_stats.found_16bit);
+ statistics_counter_event (fun, "32-bit bswap implementations found",
+ bswap_stats.found_32bit);
+ statistics_counter_event (fun, "64-bit bswap implementations found",
+ bswap_stats.found_64bit);
- changed = true;
- if (type_size == 32)
- bswap_stats.found_32bit++;
- else
- bswap_stats.found_64bit++;
+ return (changed ? TODO_update_ssa : 0);
+}
- bswap_tmp = bswap_src;
+} // anon namespace
- /* Convert the src expression if necessary. */
- if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
- {
- gimple convert_stmt;
+gimple_opt_pass *
+make_pass_optimize_bswap (gcc::context *ctxt)
+{
+ return new pass_optimize_bswap (ctxt);
+}
- bswap_tmp = create_tmp_var (bswap_type, "bswapsrc");
- add_referenced_var (bswap_tmp);
- bswap_tmp = make_ssa_name (bswap_tmp, NULL);
+/* Return true if stmt is a type conversion operation that can be stripped
+ when used in a widening multiply operation. */
+static bool
+widening_mult_conversion_strippable_p (tree result_type, gimple stmt)
+{
+ enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
- convert_stmt = gimple_build_assign_with_ops (
- CONVERT_EXPR, bswap_tmp, bswap_src, NULL);
- gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
- }
+ if (TREE_CODE (result_type) == INTEGER_TYPE)
+ {
+ tree op_type;
+ tree inner_op_type;
- call = gimple_build_call (fndecl, 1, bswap_tmp);
+ if (!CONVERT_EXPR_CODE_P (rhs_code))
+ return false;
- bswap_tmp = gimple_assign_lhs (stmt);
+ op_type = TREE_TYPE (gimple_assign_lhs (stmt));
- /* Convert the result if necessary. */
- if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
- {
- gimple convert_stmt;
-
- bswap_tmp = create_tmp_var (bswap_type, "bswapdst");
- add_referenced_var (bswap_tmp);
- bswap_tmp = make_ssa_name (bswap_tmp, NULL);
- convert_stmt = gimple_build_assign_with_ops (
- CONVERT_EXPR, gimple_assign_lhs (stmt), bswap_tmp, NULL);
- gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
- }
+ /* If the type of OP has the same precision as the result, then
+ we can strip this conversion. The multiply operation will be
+ selected to create the correct extension as a by-product. */
+ if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
+ return true;
- gimple_call_set_lhs (call, bswap_tmp);
+ /* We can also strip a conversion if it preserves the signed-ness of
+ the operation and doesn't narrow the range. */
+ inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
- if (dump_file)
- {
- fprintf (dump_file, "%d bit bswap implementation found at: ",
- (int)type_size);
- print_gimple_stmt (dump_file, stmt, 0, 0);
- }
+ /* If the inner-most type is unsigned, then we can strip any
+ intermediate widening operation. If it's signed, then the
+ intermediate widening operation must also be signed. */
+ if ((TYPE_UNSIGNED (inner_op_type)
+ || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
+ && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
+ return true;
- gsi_insert_after (&gsi, call, GSI_SAME_STMT);
- gsi_remove (&gsi, true);
- }
+ return false;
}
- statistics_counter_event (cfun, "32-bit bswap implementations found",
- bswap_stats.found_32bit);
- statistics_counter_event (cfun, "64-bit bswap implementations found",
- bswap_stats.found_64bit);
-
- return (changed ? TODO_update_ssa | TODO_verify_ssa
- | TODO_verify_stmts : 0);
+ return rhs_code == FIXED_CONVERT_EXPR;
}
-static bool
-gate_optimize_bswap (void)
-{
- return flag_expensive_optimizations && optimize;
-}
-
-struct gimple_opt_pass pass_optimize_bswap =
-{
- {
- GIMPLE_PASS,
- "bswap", /* name */
- gate_optimize_bswap, /* gate */
- execute_optimize_bswap, /* execute */
- NULL, /* sub */
- NULL, /* next */
- 0, /* static_pass_number */
- TV_NONE, /* tv_id */
- PROP_ssa, /* properties_required */
- 0, /* properties_provided */
- 0, /* properties_destroyed */
- 0, /* todo_flags_start */
- 0 /* todo_flags_finish */
- }
-};
-
/* Return true if RHS is a suitable operand for a widening multiplication,
assuming a target type of TYPE.
There are two cases:
{
gimple stmt;
tree type1, rhs1;
- enum tree_code rhs_code;
if (TREE_CODE (rhs) == SSA_NAME)
{
stmt = SSA_NAME_DEF_STMT (rhs);
if (is_gimple_assign (stmt))
{
- rhs_code = gimple_assign_rhs_code (stmt);
- if (TREE_CODE (type) == INTEGER_TYPE
- ? !CONVERT_EXPR_CODE_P (rhs_code)
- : rhs_code != FIXED_CONVERT_EXPR)
+ if (! widening_mult_conversion_strippable_p (type, stmt))
rhs1 = rhs;
else
{
static bool
convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
{
- tree lhs, rhs1, rhs2, type, type1, type2, tmp = NULL;
+ tree lhs, rhs1, rhs2, type, type1, type2;
enum insn_code handler;
- enum machine_mode to_mode, from_mode, actual_mode;
+ machine_mode to_mode, from_mode, actual_mode;
optab op;
int actual_precision;
location_t loc = gimple_location (stmt);
return false;
if (actual_precision != TYPE_PRECISION (type1)
|| from_unsigned1 != TYPE_UNSIGNED (type1))
- {
- tmp = create_tmp_var (build_nonstandard_integer_type
- (actual_precision, from_unsigned1),
- NULL);
- rhs1 = build_and_insert_cast (gsi, loc, tmp, rhs1);
- }
+ rhs1 = build_and_insert_cast (gsi, loc,
+ build_nonstandard_integer_type
+ (actual_precision, from_unsigned1), rhs1);
if (actual_precision != TYPE_PRECISION (type2)
|| from_unsigned2 != TYPE_UNSIGNED (type2))
- {
- /* Reuse the same type info, if possible. */
- if (!tmp || from_unsigned1 != from_unsigned2)
- tmp = create_tmp_var (build_nonstandard_integer_type
- (actual_precision, from_unsigned2),
- NULL);
- rhs2 = build_and_insert_cast (gsi, loc, tmp, rhs2);
- }
+ rhs2 = build_and_insert_cast (gsi, loc,
+ build_nonstandard_integer_type
+ (actual_precision, from_unsigned2), rhs2);
/* Handle constants. */
if (TREE_CODE (rhs1) == INTEGER_CST)
{
gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt;
- tree type, type1, type2, optype, tmp = NULL;
+ tree type, type1, type2, optype;
tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
optab this_optab;
enum tree_code wmult_code;
enum insn_code handler;
- enum machine_mode to_mode, from_mode, actual_mode;
+ machine_mode to_mode, from_mode, actual_mode;
location_t loc = gimple_location (stmt);
int actual_precision;
bool from_unsigned1, from_unsigned2;
It might also appear that it would be sufficient to use the existing
operands of the widening multiply, but that would limit the choice of
- multiply-and-accumulate instructions. */
+ multiply-and-accumulate instructions.
+
+ If the widened-multiplication result has more than one uses, it is
+ probably wiser not to do the conversion. */
if (code == PLUS_EXPR
&& (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
{
- if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
- &type2, &mult_rhs2))
+ if (!has_single_use (rhs1)
+ || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
+ &type2, &mult_rhs2))
return false;
add_rhs = rhs2;
conv_stmt = conv1_stmt;
}
else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
{
- if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
- &type2, &mult_rhs2))
+ if (!has_single_use (rhs2)
+ || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
+ &type2, &mult_rhs2))
return false;
add_rhs = rhs1;
conv_stmt = conv2_stmt;
actual_precision = GET_MODE_PRECISION (actual_mode);
if (actual_precision != TYPE_PRECISION (type1)
|| from_unsigned1 != TYPE_UNSIGNED (type1))
- {
- tmp = create_tmp_var (build_nonstandard_integer_type
- (actual_precision, from_unsigned1),
- NULL);
- mult_rhs1 = build_and_insert_cast (gsi, loc, tmp, mult_rhs1);
- }
+ mult_rhs1 = build_and_insert_cast (gsi, loc,
+ build_nonstandard_integer_type
+ (actual_precision, from_unsigned1),
+ mult_rhs1);
if (actual_precision != TYPE_PRECISION (type2)
|| from_unsigned2 != TYPE_UNSIGNED (type2))
- {
- if (!tmp || from_unsigned1 != from_unsigned2)
- tmp = create_tmp_var (build_nonstandard_integer_type
- (actual_precision, from_unsigned2),
- NULL);
- mult_rhs2 = build_and_insert_cast (gsi, loc, tmp, mult_rhs2);
- }
+ mult_rhs2 = build_and_insert_cast (gsi, loc,
+ build_nonstandard_integer_type
+ (actual_precision, from_unsigned2),
+ mult_rhs2);
if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
- add_rhs = build_and_insert_cast (gsi, loc, create_tmp_var (type, NULL),
- add_rhs);
+ add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
/* Handle constants. */
if (TREE_CODE (mult_rhs1) == INTEGER_CST)
return false;
}
+ /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
+ by a MULT_EXPR that we'll visit later, we might be able to
+ get a more profitable match with fnma.
+ OTOH, if we don't, a negate / fma pair has likely lower latency
+ that a mult / subtract pair. */
+ if (use_code == MINUS_EXPR && !negate_p
+ && gimple_assign_rhs1 (use_stmt) == result
+ && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
+ && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
+ {
+ tree rhs2 = gimple_assign_rhs2 (use_stmt);
+
+ if (TREE_CODE (rhs2) == SSA_NAME)
+ {
+ gimple stmt2 = SSA_NAME_DEF_STMT (rhs2);
+ if (has_single_use (rhs2)
+ && is_gimple_assign (stmt2)
+ && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
+ return false;
+ }
+ }
+
/* We can't handle a * b + a * b. */
if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
return false;
true, NULL_TREE, true,
GSI_SAME_STMT);
- fma_stmt = gimple_build_assign_with_ops3 (FMA_EXPR,
- gimple_assign_lhs (use_stmt),
- mulop1, op2,
- addop);
+ fma_stmt = gimple_build_assign_with_ops (FMA_EXPR,
+ gimple_assign_lhs (use_stmt),
+ mulop1, op2,
+ addop);
gsi_replace (&gsi, fma_stmt, true);
widen_mul_stats.fmas_inserted++;
}
smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
where appropriate. */
-static unsigned int
-execute_optimize_widening_mul (void)
+namespace {
+
+const pass_data pass_data_optimize_widening_mul =
+{
+ GIMPLE_PASS, /* type */
+ "widening_mul", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_optimize_widening_mul : public gimple_opt_pass
+{
+public:
+ pass_optimize_widening_mul (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *)
+ {
+ return flag_expensive_optimizations && optimize;
+ }
+
+ virtual unsigned int execute (function *);
+
+}; // class pass_optimize_widening_mul
+
+unsigned int
+pass_optimize_widening_mul::execute (function *fun)
{
basic_block bb;
bool cfg_changed = false;
memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
- FOR_EACH_BB (bb)
+ FOR_EACH_BB_FN (bb, fun)
{
gimple_stmt_iterator gsi;
}
}
- statistics_counter_event (cfun, "widening multiplications inserted",
+ statistics_counter_event (fun, "widening multiplications inserted",
widen_mul_stats.widen_mults_inserted);
- statistics_counter_event (cfun, "widening maccs inserted",
+ statistics_counter_event (fun, "widening maccs inserted",
widen_mul_stats.maccs_inserted);
- statistics_counter_event (cfun, "fused multiply-adds inserted",
+ statistics_counter_event (fun, "fused multiply-adds inserted",
widen_mul_stats.fmas_inserted);
return cfg_changed ? TODO_cleanup_cfg : 0;
}
-static bool
-gate_optimize_widening_mul (void)
-{
- return flag_expensive_optimizations && optimize;
-}
+} // anon namespace
-struct gimple_opt_pass pass_optimize_widening_mul =
+gimple_opt_pass *
+make_pass_optimize_widening_mul (gcc::context *ctxt)
{
- {
- GIMPLE_PASS,
- "widening_mul", /* name */
- gate_optimize_widening_mul, /* gate */
- execute_optimize_widening_mul, /* execute */
- NULL, /* sub */
- NULL, /* next */
- 0, /* static_pass_number */
- TV_NONE, /* tv_id */
- PROP_ssa, /* properties_required */
- 0, /* properties_provided */
- 0, /* properties_destroyed */
- 0, /* todo_flags_start */
- TODO_verify_ssa
- | TODO_verify_stmts
- | TODO_update_ssa /* todo_flags_finish */
- }
-};
+ return new pass_optimize_widening_mul (ctxt);
+}