insns = BITMAP_ALLOC (NULL);
defs = BITMAP_ALLOC (NULL);
defs_conv = BITMAP_ALLOC (NULL);
+ insns_conv = BITMAP_ALLOC (NULL);
queue = NULL;
+
+ n_sse_to_integer = 0;
+ n_integer_to_sse = 0;
}
/* Free chain's data. */
BITMAP_FREE (insns);
BITMAP_FREE (defs);
BITMAP_FREE (defs_conv);
+ BITMAP_FREE (insns_conv);
bitmap_obstack_release (NULL);
}
bitmap_set_bit (queue, insn_uid);
}
-general_scalar_chain::general_scalar_chain (enum machine_mode smode_,
- enum machine_mode vmode_)
- : scalar_chain (smode_, vmode_)
-{
- insns_conv = BITMAP_ALLOC (NULL);
- n_sse_to_integer = 0;
- n_integer_to_sse = 0;
-}
-
-general_scalar_chain::~general_scalar_chain ()
-{
- BITMAP_FREE (insns_conv);
-}
-
/* For DImode conversion, mark register defined by DEF as requiring
conversion. */
void
-general_scalar_chain::mark_dual_mode_def (df_ref def)
+scalar_chain::mark_dual_mode_def (df_ref def)
{
gcc_assert (DF_REF_REG_DEF_P (def));
DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
}
-/* For TImode conversion, it is unused. */
-
-void
-timode_scalar_chain::mark_dual_mode_def (df_ref)
-{
- gcc_unreachable ();
-}
-
/* Check REF's chain to add new insns into a queue
and find registers requiring conversion. */
/* Convert COMPARE to vector mode. */
rtx
-general_scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn)
+scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn)
{
rtx tmp = gen_reg_rtx (vmode);
rtx src;
emit_conversion_insns (gen_move_insn (dst, tmp), insn);
dst = gen_rtx_SUBREG (vmode, tmp, 0);
}
- else if (REG_P (dst))
+ else if (REG_P (dst) && GET_MODE (dst) == smode)
{
/* Replace the definition with a SUBREG to the definition we
use inside the chain. */
df_insn_rescan (insn);
}
+/* Compute a gain for chain conversion. */
+
+int
+timode_scalar_chain::compute_convert_gain ()
+{
+ /* Assume that if we have to move TImode values between units,
+ then transforming this chain isn't worth it. */
+ if (n_sse_to_integer || n_integer_to_sse)
+ return -1;
+
+ bitmap_iterator bi;
+ unsigned insn_uid;
+
+ /* Split ties to prefer V1TImode when not optimizing for size. */
+ int gain = optimize_size ? 0 : 1;
+
+ if (dump_file)
+ fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
+
+ EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
+ {
+ rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
+ rtx def_set = single_set (insn);
+ rtx src = SET_SRC (def_set);
+ rtx dst = SET_DEST (def_set);
+ int igain = 0;
+
+ switch (GET_CODE (src))
+ {
+ case REG:
+ if (optimize_insn_for_size_p ())
+ igain = MEM_P (dst) ? COSTS_N_BYTES (6) : COSTS_N_BYTES (3);
+ else
+ igain = COSTS_N_INSNS (1);
+ break;
+
+ case MEM:
+ igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (7)
+ : COSTS_N_INSNS (1);
+ break;
+
+ case CONST_INT:
+ if (MEM_P (dst)
+ && standard_sse_constant_p (src, V1TImode))
+ igain = optimize_insn_for_size_p() ? COSTS_N_BYTES (11) : 1;
+ break;
+
+ case NOT:
+ if (MEM_P (dst))
+ igain = -COSTS_N_INSNS (1);
+ break;
+
+ case AND:
+ case XOR:
+ case IOR:
+ if (!MEM_P (dst))
+ igain = COSTS_N_INSNS (1);
+ break;
+
+ default:
+ break;
+ }
+
+ if (igain != 0 && dump_file)
+ {
+ fprintf (dump_file, " Instruction gain %d for ", igain);
+ dump_insn_slim (dump_file, insn);
+ }
+ gain += igain;
+ }
+
+ if (dump_file)
+ fprintf (dump_file, " Total gain: %d\n", gain);
+
+ return gain;
+}
+
/* Fix uses of converted REG in debug insns. */
void
}
}
+/* Convert operand OP in INSN from TImode to V1TImode. */
+
+void
+timode_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
+{
+ *op = copy_rtx_if_shared (*op);
+
+ if (REG_P (*op))
+ *op = gen_rtx_SUBREG (V1TImode, *op, 0);
+ else if (MEM_P (*op))
+ {
+ rtx tmp = gen_reg_rtx (V1TImode);
+ emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (V1TImode, tmp, 0),
+ gen_gpr_to_xmm_move_src (V1TImode, *op)),
+ insn);
+ *op = gen_rtx_SUBREG (V1TImode, tmp, 0);
+
+ if (dump_file)
+ fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
+ INSN_UID (insn), REGNO (tmp));
+ }
+ else if (CONST_INT_P (*op))
+ {
+ rtx vec_cst;
+ rtx tmp = gen_rtx_SUBREG (V1TImode, gen_reg_rtx (TImode), 0);
+
+ /* Prefer all ones vector in case of -1. */
+ if (constm1_operand (*op, TImode))
+ vec_cst = CONSTM1_RTX (V1TImode);
+ else
+ {
+ rtx *v = XALLOCAVEC (rtx, 1);
+ v[0] = *op;
+ vec_cst = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec_v (1, v));
+ }
+
+ if (!standard_sse_constant_p (vec_cst, V1TImode))
+ {
+ start_sequence ();
+ vec_cst = validize_mem (force_const_mem (V1TImode, vec_cst));
+ rtx_insn *seq = get_insns ();
+ end_sequence ();
+ emit_insn_before (seq, insn);
+ }
+
+ emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
+ *op = tmp;
+ }
+ else
+ {
+ gcc_assert (SUBREG_P (*op));
+ gcc_assert (GET_MODE (*op) == vmode);
+ }
+}
+
/* Convert INSN from TImode to V1T1mode. */
void
rtx def_set = single_set (insn);
rtx src = SET_SRC (def_set);
rtx dst = SET_DEST (def_set);
+ rtx tmp;
+ if (MEM_P (dst) && !REG_P (src))
+ {
+ /* There are no scalar integer instructions and therefore
+ temporary register usage is required. */
+ }
switch (GET_CODE (dst))
{
case REG:
- {
- rtx tmp = find_reg_equal_equiv_note (insn);
- if (tmp)
- PUT_MODE (XEXP (tmp, 0), V1TImode);
- PUT_MODE (dst, V1TImode);
- fix_debug_reg_uses (dst);
- }
+ if (GET_MODE (dst) == TImode)
+ {
+ tmp = find_reg_equal_equiv_note (insn);
+ if (tmp && GET_MODE (XEXP (tmp, 0)) == TImode)
+ PUT_MODE (XEXP (tmp, 0), V1TImode);
+ PUT_MODE (dst, V1TImode);
+ fix_debug_reg_uses (dst);
+ }
break;
case MEM:
PUT_MODE (dst, V1TImode);
{
/* Since there are no instructions to store 128-bit constant,
temporary register usage is required. */
- rtx tmp = gen_reg_rtx (V1TImode);
start_sequence ();
src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
src = validize_mem (force_const_mem (V1TImode, src));
end_sequence ();
if (seq)
emit_insn_before (seq, insn);
- emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
- dst = tmp;
+ if (MEM_P (dst))
+ {
+ tmp = gen_reg_rtx (V1TImode);
+ emit_insn_before (gen_rtx_SET (tmp, src), insn);
+ src = tmp;
+ }
}
break;
default:
gcc_unreachable ();
}
- if (NONDEBUG_INSN_P (insn))
+ if (MEM_P (dst))
+ {
+ tmp = gen_reg_rtx (V1TImode);
+ emit_insn_before (gen_rtx_SET (tmp, src), insn);
+ src = tmp;
+ }
+ break;
+
+ case AND:
+ if (GET_CODE (XEXP (src, 0)) == NOT)
+ {
+ convert_op (&XEXP (XEXP (src, 0), 0), insn);
+ convert_op (&XEXP (src, 1), insn);
+ PUT_MODE (XEXP (src, 0), V1TImode);
+ PUT_MODE (src, V1TImode);
+ break;
+ }
+ /* FALLTHRU */
+
+ case XOR:
+ case IOR:
+ convert_op (&XEXP (src, 0), insn);
+ convert_op (&XEXP (src, 1), insn);
+ PUT_MODE (src, V1TImode);
+ if (MEM_P (dst))
{
- rtx tmp = gen_reg_rtx (V1TImode);
- /* Since there are no instructions to store standard SSE
- constant, temporary register usage is required. */
- emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
- dst = tmp;
+ tmp = gen_reg_rtx (V1TImode);
+ emit_insn_before (gen_rtx_SET (tmp, src), insn);
+ src = tmp;
}
break;
+ case NOT:
+ src = XEXP (src, 0);
+ convert_op (&src, insn);
+ tmp = gen_reg_rtx (V1TImode);
+ emit_insn_before (gen_move_insn (tmp, CONSTM1_RTX (V1TImode)), insn);
+ src = gen_rtx_XOR (V1TImode, src, tmp);
+ if (MEM_P (dst))
+ {
+ tmp = gen_reg_rtx (V1TImode);
+ emit_insn_before (gen_rtx_SET (tmp, src), insn);
+ src = tmp;
+ }
+ break;
+
+ case COMPARE:
+ dst = gen_rtx_REG (CCmode, FLAGS_REG);
+ src = convert_compare (XEXP (src, 0), XEXP (src, 1), insn);
+ break;
+
default:
gcc_unreachable ();
}
return true;
}
+/* Check for a suitable TImode memory operand. */
+
+static bool
+timode_mem_p (rtx x)
+{
+ return MEM_P (x)
+ && (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
+ || !misaligned_operand (x, TImode));
+}
+
/* The TImode version of scalar_to_vector_candidate_p. */
static bool
rtx src = SET_SRC (def_set);
rtx dst = SET_DEST (def_set);
- /* Only TImode load and store are allowed. */
- if (GET_MODE (dst) != TImode)
+ if (GET_CODE (src) == COMPARE)
+ return convertible_comparison_p (insn, TImode);
+
+ if (GET_MODE (dst) != TImode
+ || (GET_MODE (src) != TImode
+ && !CONST_SCALAR_INT_P (src)))
return false;
- if (MEM_P (dst))
- {
- /* Check for store. Memory must be aligned or unaligned store
- is optimal. Only support store from register, standard SSE
- constant or CONST_WIDE_INT generated from piecewise store.
+ if (!REG_P (dst) && !MEM_P (dst))
+ return false;
- ??? Verify performance impact before enabling CONST_INT for
- __int128 store. */
- if (misaligned_operand (dst, TImode)
- && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
- return false;
+ if (MEM_P (dst)
+ && misaligned_operand (dst, TImode)
+ && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
+ return false;
- switch (GET_CODE (src))
- {
- default:
- return false;
+ switch (GET_CODE (src))
+ {
+ case REG:
+ case CONST_WIDE_INT:
+ return true;
- case REG:
- case CONST_WIDE_INT:
- return true;
+ case CONST_INT:
+ /* ??? Verify performance impact before enabling CONST_INT for
+ __int128 store. */
+ return standard_sse_constant_p (src, TImode);
- case CONST_INT:
- return standard_sse_constant_p (src, TImode);
- }
- }
- else if (MEM_P (src))
- {
- /* Check for load. Memory must be aligned or unaligned load is
- optimal. */
+ case MEM:
+ /* Memory must be aligned or unaligned load is optimal. */
return (REG_P (dst)
&& (!misaligned_operand (src, TImode)
|| TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
- }
- return false;
+ case AND:
+ if (!MEM_P (dst)
+ && GET_CODE (XEXP (src, 0)) == NOT
+ && REG_P (XEXP (XEXP (src, 0), 0))
+ && (REG_P (XEXP (src, 1)) || timode_mem_p (XEXP (src, 1))))
+ return true;
+ return REG_P (XEXP (src, 0))
+ && (REG_P (XEXP (src, 1)) || timode_mem_p (XEXP (src, 1)));
+
+ case IOR:
+ case XOR:
+ return REG_P (XEXP (src, 0))
+ && (REG_P (XEXP (src, 1)) || timode_mem_p (XEXP (src, 1)));
+
+ case NOT:
+ return REG_P (XEXP (src, 0)) || timode_mem_p (XEXP (src, 0));
+
+ default:
+ return false;
+ }
}
/* For a register REGNO, scan instructions for its defs and uses.
/* Registers used in both vector and sclar modes. */
bitmap defs_conv;
+ bitmap insns_conv;
+ unsigned n_sse_to_integer;
+ unsigned n_integer_to_sse;
+
void build (bitmap candidates, unsigned insn_uid);
virtual int compute_convert_gain () = 0;
int convert ();
protected:
void add_to_queue (unsigned insn_uid);
void emit_conversion_insns (rtx insns, rtx_insn *pos);
+ rtx convert_compare (rtx op1, rtx op2, rtx_insn *insn);
+ void mark_dual_mode_def (df_ref def);
private:
void add_insn (bitmap candidates, unsigned insn_uid);
void analyze_register_chain (bitmap candidates, df_ref ref);
- virtual void mark_dual_mode_def (df_ref def) = 0;
virtual void convert_insn (rtx_insn *insn) = 0;
virtual void convert_registers () = 0;
+ virtual void convert_op (rtx *op, rtx_insn *insn) = 0;
};
class general_scalar_chain : public scalar_chain
{
public:
- general_scalar_chain (enum machine_mode smode_, enum machine_mode vmode_);
- ~general_scalar_chain ();
+ general_scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
+ : scalar_chain (smode_, vmode_) {}
int compute_convert_gain () final override;
+
private:
hash_map<rtx, rtx> defs_map;
- bitmap insns_conv;
- unsigned n_sse_to_integer;
- unsigned n_integer_to_sse;
- void mark_dual_mode_def (df_ref def) final override;
void convert_insn (rtx_insn *insn) final override;
- void convert_op (rtx *op, rtx_insn *insn);
void convert_reg (rtx_insn *insn, rtx dst, rtx src);
+ void convert_op (rtx *op, rtx_insn *insn);
void make_vector_copies (rtx_insn *, rtx);
void convert_registers () final override;
- rtx convert_compare (rtx op1, rtx op2, rtx_insn *insn);
int vector_const_cost (rtx exp);
};
{
public:
timode_scalar_chain () : scalar_chain (TImode, V1TImode) {}
-
- /* Convert from TImode to V1TImode is always faster. */
- int compute_convert_gain () final override { return 1; }
+ int compute_convert_gain () final override;
private:
- void mark_dual_mode_def (df_ref def) final override;
void fix_debug_reg_uses (rtx reg);
void convert_insn (rtx_insn *insn) final override;
- /* We don't convert registers to difference size. */
+ void convert_op (rtx *op, rtx_insn *insn);
+ /* We don't convert registers to different size. */
void convert_registers () final override {}
};
} // anon namespace
-bool ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined);
+bool ix86_save_reg (unsigned int regno, bool maybe_eh_return,
+ bool ignore_outlined);
int ix86_compare_version_priority (tree decl1, tree decl2);
tree ix86_generate_version_dispatcher_body (void *node_p);
tree ix86_get_function_versions_dispatcher (void *decl);