+2016-04-27 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/70155
+ * config/i386/i386.c (scalar_to_vector_candidate_p): Renamed
+ to ...
+ (dimode_scalar_to_vector_candidate_p): This.
+ (timode_scalar_to_vector_candidate_p): New function.
+ (scalar_to_vector_candidate_p): Likewise.
+ (timode_check_non_convertible_regs): Likewise.
+ (timode_remove_non_convertible_regs): Likewise.
+ (remove_non_convertible_regs): Likewise.
+ (remove_non_convertible_regs): Renamed to ...
+ (dimode_remove_non_convertible_regs): This.
+ (scalar_chain::~scalar_chain): Make it virtual.
+ (scalar_chain::compute_convert_gain): Make it pure virtual.
+ (scalar_chain::mark_dual_mode_def): Likewise.
+ (scalar_chain::convert_insn): Likewise.
+ (scalar_chain::convert_registers): Likewise.
+ (scalar_chain::add_to_queue): Make it protected.
+ (scalar_chain::emit_conversion_insns): Likewise.
+ (scalar_chain::replace_with_subreg): Likewise.
+ (scalar_chain::replace_with_subreg_in_insn): Likewise.
+ (scalar_chain::convert_op): Likewise.
+ (scalar_chain::convert_reg): Likewise.
+ (scalar_chain::make_vector_copies): Likewise.
+ (scalar_chain::convert_registers): New pure virtual function.
+ (class dimode_scalar_chain): New class.
+ (class timode_scalar_chain): Likewise.
+ (scalar_chain::mark_dual_mode_def): Renamed to ...
+ (dimode_scalar_chain::mark_dual_mode_def): This.
+ (timode_scalar_chain::mark_dual_mode_def): New function.
+ (timode_scalar_chain::convert_insn): Likewise.
+ (dimode_scalar_chain::convert_registers): Likewise.
+ (scalar_chain::compute_convert_gain): Renamed to ...
+ (dimode_scalar_chain::compute_convert_gain): This.
+ (scalar_chain::replace_with_subreg): Renamed to ...
+ (dimode_scalar_chain::replace_with_subreg): This.
+ (scalar_chain::replace_with_subreg_in_insn): Renamed to ...
+ (dimode_scalar_chain::replace_with_subreg_in_insn): This.
+ (scalar_chain::make_vector_copies): Renamed to ...
+ (dimode_scalar_chain::make_vector_copies): This.
+ (scalar_chain::convert_reg): Renamed to ...
+ (dimode_scalar_chain::convert_reg ): This.
+ (scalar_chain::convert_op): Renamed to ...
+ (dimode_scalar_chain::convert_op): This.
+ (scalar_chain::convert_insn): Renamed to ...
+ (dimode_scalar_chain::convert_insn): This.
+ (scalar_chain::convert): Call convert_registers.
+ (convert_scalars_to_vector): Change to scalar_chain pointer to
+ use timode_scalar_chain in 64-bit mode and dimode_scalar_chain
+ in 32-bit mode. Delete scalar_chain pointer. Call
+ free_dominance_info in 64-bit mode.
+ (pass_stv::gate): Remove TARGET_64BIT check.
+ (ix86_option_override): Put the 64-bit STV pass before the CSE
+ pass.
+
2016-04-27 Pierre-Marie de Rodat <derodat@adacore.com>
* dwarf2out.h (struct dw_loc_descr_node): Remove the
return true;
}
-/* Return 1 if INSN may be converted into vector
- instruction. */
+/* The DImode version of scalar_to_vector_candidate_p. */
static bool
-scalar_to_vector_candidate_p (rtx_insn *insn)
+dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
{
rtx def_set = single_set (insn);
return true;
}
-/* For a given bitmap of insn UIDs scans all instruction and
- remove insn from CANDIDATES in case it has both convertible
- and not convertible definitions.
+/* The TImode version of scalar_to_vector_candidate_p. */
- All insns in a bitmap are conversion candidates according to
- scalar_to_vector_candidate_p. Currently it implies all insns
- are single_set. */
+static bool
+timode_scalar_to_vector_candidate_p (rtx_insn *insn)
+{
+ rtx def_set = single_set (insn);
+
+ if (!def_set)
+ return false;
+
+ if (has_non_address_hard_reg (insn))
+ return false;
+
+ rtx src = SET_SRC (def_set);
+ rtx dst = SET_DEST (def_set);
+
+ /* Only TImode load and store are allowed. */
+ if (GET_MODE (dst) != TImode)
+ return false;
+
+ if (MEM_P (dst))
+ {
+ /* Check for store. Only support store from register or standard
+ SSE constants. Memory must be aligned or unaligned store is
+ optimal. */
+ if (misaligned_operand (dst, TImode)
+ && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
+ return false;
+
+ switch (GET_CODE (src))
+ {
+ default:
+ return false;
+
+ case REG:
+ return true;
+
+ case CONST_INT:
+ return standard_sse_constant_p (src, TImode);
+ }
+ }
+ else if (MEM_P (src))
+ {
+ /* Check for load. Memory must be aligned or unaligned load is
+ optimal. */
+ return (REG_P (dst)
+ && (!misaligned_operand (src, TImode)
+ || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
+ }
+
+ return false;
+}
+
+/* Return 1 if INSN may be converted into vector
+ instruction. */
+
+static bool
+scalar_to_vector_candidate_p (rtx_insn *insn)
+{
+ if (TARGET_64BIT)
+ return timode_scalar_to_vector_candidate_p (insn);
+ else
+ return dimode_scalar_to_vector_candidate_p (insn);
+}
+
+/* The DImode version of remove_non_convertible_regs. */
static void
-remove_non_convertible_regs (bitmap candidates)
+dimode_remove_non_convertible_regs (bitmap candidates)
{
bitmap_iterator bi;
unsigned id;
BITMAP_FREE (regs);
}
+/* For a register REGNO, scan instructions for its defs and uses.
+ Put REGNO in REGS if a def or use isn't in CANDIDATES. */
+
+static void
+timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
+ unsigned int regno)
+{
+ for (df_ref def = DF_REG_DEF_CHAIN (regno);
+ def;
+ def = DF_REF_NEXT_REG (def))
+ {
+ if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "r%d has non convertible def in insn %d\n",
+ regno, DF_REF_INSN_UID (def));
+
+ bitmap_set_bit (regs, regno);
+ break;
+ }
+ }
+
+ for (df_ref ref = DF_REG_USE_CHAIN (regno);
+ ref;
+ ref = DF_REF_NEXT_REG (ref))
+ {
+ /* Debug instructions are skipped. */
+ if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
+ && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ "r%d has non convertible use in insn %d\n",
+ regno, DF_REF_INSN_UID (ref));
+
+ bitmap_set_bit (regs, regno);
+ break;
+ }
+ }
+}
+
+/* The TImode version of remove_non_convertible_regs. */
+
+static void
+timode_remove_non_convertible_regs (bitmap candidates)
+{
+ bitmap_iterator bi;
+ unsigned id;
+ bitmap regs = BITMAP_ALLOC (NULL);
+
+ EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
+ {
+ rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
+ rtx dest = SET_DEST (def_set);
+ rtx src = SET_SRC (def_set);
+
+ if ((!REG_P (dest)
+ || bitmap_bit_p (regs, REGNO (dest))
+ || HARD_REGISTER_P (dest))
+ && (!REG_P (src)
+ || bitmap_bit_p (regs, REGNO (src))
+ || HARD_REGISTER_P (src)))
+ continue;
+
+ if (REG_P (dest))
+ timode_check_non_convertible_regs (candidates, regs,
+ REGNO (dest));
+
+ if (REG_P (src))
+ timode_check_non_convertible_regs (candidates, regs,
+ REGNO (src));
+ }
+
+ EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
+ {
+ for (df_ref def = DF_REG_DEF_CHAIN (id);
+ def;
+ def = DF_REF_NEXT_REG (def))
+ if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
+ {
+ if (dump_file)
+ fprintf (dump_file, "Removing insn %d from candidates list\n",
+ DF_REF_INSN_UID (def));
+
+ bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
+ }
+
+ for (df_ref ref = DF_REG_USE_CHAIN (id);
+ ref;
+ ref = DF_REF_NEXT_REG (ref))
+ if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
+ {
+ if (dump_file)
+ fprintf (dump_file, "Removing insn %d from candidates list\n",
+ DF_REF_INSN_UID (ref));
+
+ bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
+ }
+ }
+
+ BITMAP_FREE (regs);
+}
+
+/* For a given bitmap of insn UIDs scans all instruction and
+ remove insn from CANDIDATES in case it has both convertible
+ and not convertible definitions.
+
+ All insns in a bitmap are conversion candidates according to
+ scalar_to_vector_candidate_p. Currently it implies all insns
+ are single_set. */
+
+static void
+remove_non_convertible_regs (bitmap candidates)
+{
+ if (TARGET_64BIT)
+ timode_remove_non_convertible_regs (candidates);
+ else
+ dimode_remove_non_convertible_regs (candidates);
+}
+
class scalar_chain
{
public:
scalar_chain ();
- ~scalar_chain ();
+ virtual ~scalar_chain ();
static unsigned max_id;
bitmap defs_conv;
void build (bitmap candidates, unsigned insn_uid);
- int compute_convert_gain ();
+ virtual int compute_convert_gain () = 0;
int convert ();
+ protected:
+ void add_to_queue (unsigned insn_uid);
+ void emit_conversion_insns (rtx insns, rtx_insn *pos);
+
private:
void add_insn (bitmap candidates, unsigned insn_uid);
- void add_to_queue (unsigned insn_uid);
- void mark_dual_mode_def (df_ref def);
void analyze_register_chain (bitmap candidates, df_ref ref);
+ virtual void mark_dual_mode_def (df_ref def) = 0;
+ virtual void convert_insn (rtx_insn *insn) = 0;
+ virtual void convert_registers () = 0;
+};
+
+class dimode_scalar_chain : public scalar_chain
+{
+ public:
+ int compute_convert_gain ();
+ private:
+ void mark_dual_mode_def (df_ref def);
rtx replace_with_subreg (rtx x, rtx reg, rtx subreg);
- void emit_conversion_insns (rtx insns, rtx_insn *pos);
void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg);
void convert_insn (rtx_insn *insn);
void convert_op (rtx *op, rtx_insn *insn);
void convert_reg (unsigned regno);
void make_vector_copies (unsigned regno);
+ void convert_registers ();
+};
+
+class timode_scalar_chain : public scalar_chain
+{
+ public:
+ /* Convert from TImode to V1TImode is always faster. */
+ int compute_convert_gain () { return 1; }
+
+ private:
+ void mark_dual_mode_def (df_ref def);
+ void convert_insn (rtx_insn *insn);
+ /* We don't convert registers to difference size. */
+ void convert_registers () {}
};
unsigned scalar_chain::max_id = 0;
bitmap_set_bit (queue, insn_uid);
}
-/* Mark register defined by DEF as requiring conversion. */
+/* For DImode conversion, mark register defined by DEF as requiring
+ conversion. */
void
-scalar_chain::mark_dual_mode_def (df_ref def)
+dimode_scalar_chain::mark_dual_mode_def (df_ref def)
{
gcc_assert (DF_REF_REG_DEF_P (def));
bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
}
+/* For TImode conversion, it is unused. */
+
+void
+timode_scalar_chain::mark_dual_mode_def (df_ref)
+{
+ gcc_unreachable ();
+}
+
/* Check REF's chain to add new insns into a queue
and find registers requiring conversion. */
/* Compute a gain for chain conversion. */
int
-scalar_chain::compute_convert_gain ()
+dimode_scalar_chain::compute_convert_gain ()
{
bitmap_iterator bi;
unsigned insn_uid;
/* Replace REG in X with a V2DI subreg of NEW_REG. */
rtx
-scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
+dimode_scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg)
{
if (x == reg)
return gen_rtx_SUBREG (V2DImode, new_reg, 0);
/* Replace REG in INSN with a V2DI subreg of NEW_REG. */
void
-scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx new_reg)
+dimode_scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn,
+ rtx reg, rtx new_reg)
{
replace_with_subreg (single_set (insn), reg, new_reg);
}
and replace its uses in a chain. */
void
-scalar_chain::make_vector_copies (unsigned regno)
+dimode_scalar_chain::make_vector_copies (unsigned regno)
{
rtx reg = regno_reg_rtx[regno];
rtx vreg = gen_reg_rtx (DImode);
in case register is used in not convertible insn. */
void
-scalar_chain::convert_reg (unsigned regno)
+dimode_scalar_chain::convert_reg (unsigned regno)
{
bool scalar_copy = bitmap_bit_p (defs_conv, regno);
rtx reg = regno_reg_rtx[regno];
registers conversion. */
void
-scalar_chain::convert_op (rtx *op, rtx_insn *insn)
+dimode_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
{
*op = copy_rtx_if_shared (*op);
/* Convert INSN to vector mode. */
void
-scalar_chain::convert_insn (rtx_insn *insn)
+dimode_scalar_chain::convert_insn (rtx_insn *insn)
{
rtx def_set = single_set (insn);
rtx src = SET_SRC (def_set);
df_insn_rescan (insn);
}
+/* Convert INSN from TImode to V1T1mode. */
+
+void
+timode_scalar_chain::convert_insn (rtx_insn *insn)
+{
+ rtx def_set = single_set (insn);
+ rtx src = SET_SRC (def_set);
+ rtx dst = SET_DEST (def_set);
+
+ switch (GET_CODE (dst))
+ {
+ case REG:
+ {
+ rtx tmp = find_reg_equal_equiv_note (insn);
+ if (tmp)
+ PUT_MODE (XEXP (tmp, 0), V1TImode);
+ }
+ /* FALLTHRU */
+ case MEM:
+ PUT_MODE (dst, V1TImode);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ switch (GET_CODE (src))
+ {
+ case REG:
+ case MEM:
+ PUT_MODE (src, V1TImode);
+ break;
+
+ case CONST_INT:
+ switch (standard_sse_constant_p (src, TImode))
+ {
+ case 1:
+ src = CONST0_RTX (GET_MODE (dst));
+ break;
+ case 2:
+ src = CONSTM1_RTX (GET_MODE (dst));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ if (NONDEBUG_INSN_P (insn))
+ {
+ rtx tmp = gen_reg_rtx (V1TImode);
+ /* Since there are no instructions to store standard SSE
+ constant, temporary register usage is required. */
+ emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
+ dst = tmp;
+ }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ SET_SRC (def_set) = src;
+ SET_DEST (def_set) = dst;
+
+ /* Drop possible dead definitions. */
+ PATTERN (insn) = def_set;
+
+ INSN_CODE (insn) = -1;
+ recog_memoized (insn);
+ df_insn_rescan (insn);
+}
+
+void
+dimode_scalar_chain::convert_registers ()
+{
+ bitmap_iterator bi;
+ unsigned id;
+
+ EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
+ convert_reg (id);
+
+ EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
+ make_vector_copies (id);
+}
+
/* Convert whole chain creating required register
conversions and copies. */
if (dump_file)
fprintf (dump_file, "Converting chain #%d...\n", chain_id);
- EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi)
- convert_reg (id);
-
- EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi)
- make_vector_copies (id);
+ convert_registers ();
EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
{
while (!bitmap_empty_p (candidates))
{
unsigned uid = bitmap_first_set_bit (candidates);
- scalar_chain chain;
+ scalar_chain *chain;
+
+ if (TARGET_64BIT)
+ chain = new timode_scalar_chain;
+ else
+ chain = new dimode_scalar_chain;
/* Find instructions chain we want to convert to vector mode.
Check all uses and definitions to estimate all required
conversions. */
- chain.build (candidates, uid);
+ chain->build (candidates, uid);
- if (chain.compute_convert_gain () > 0)
- converted_insns += chain.convert ();
+ if (chain->compute_convert_gain () > 0)
+ converted_insns += chain->convert ();
else
if (dump_file)
fprintf (dump_file, "Chain #%d conversion is not profitable\n",
- chain.chain_id);
+ chain->chain_id);
+
+ delete chain;
}
if (dump_file)
bitmap_obstack_release (NULL);
df_process_deferred_rescans ();
+ /* FIXME: Since the CSE pass may change dominance info, which isn't
+ expected by the fwprop pass, call free_dominance_info to
+ invalidate dominance info. Otherwise, the fwprop pass may crash
+ when dominance info is changed. */
+ if (TARGET_64BIT)
+ free_dominance_info (CDI_DOMINATORS);
+
/* Conversion means we may have 128bit register spills/fills
which require aligned stack. */
if (converted_insns)
/* opt_pass methods: */
virtual bool gate (function *)
{
- return !TARGET_64BIT && TARGET_STV && TARGET_SSE2 && optimize > 1;
+ return TARGET_STV && TARGET_SSE2 && optimize > 1;
}
virtual unsigned int execute (function *)
1, PASS_POS_INSERT_AFTER
};
opt_pass *pass_stv = make_pass_stv (g);
- struct register_pass_info stv_info
+ struct register_pass_info stv_info_dimode
= { pass_stv, "combine",
1, PASS_POS_INSERT_AFTER
};
+ /* Run the 64-bit STV pass before the CSE pass so that CONST0_RTX and
+ CONSTM1_RTX generated by the STV pass can be CSEed. */
+ struct register_pass_info stv_info_timode
+ = { pass_stv, "cse2",
+ 1, PASS_POS_INSERT_BEFORE
+ };
ix86_option_override_internal (true, &global_options, &global_options_set);
/* This needs to be done at start up. It's convenient to do it here. */
register_pass (&insert_vzeroupper_info);
- register_pass (&stv_info);
+ register_pass (TARGET_64BIT ? &stv_info_timode : &stv_info_dimode);
}
/* Implement the TARGET_OFFLOAD_OPTIONS hook. */