machine_mode innermode = GET_MODE_INNER (mode);
rtx reg;
- /* If that fails, force VAL into a register. */
+ /* If that fails, force VAL into a register or mem. */
start_sequence ();
- reg = force_reg (innermode, val);
- if (GET_MODE (reg) != innermode)
- reg = gen_lowpart (innermode, reg);
+
+ if (!TARGET_PREFER_BCST_FROM_INTEGER && CONST_INT_P (val)
+ && GET_MODE_BITSIZE (innermode) <= HOST_BITS_PER_WIDE_INT
+ && GET_MODE_BITSIZE(mode) >= 128)
+ reg = validize_mem (force_const_mem (innermode, val));
+ else
+ {
+ reg = force_reg (innermode, val);
+ if (GET_MODE (reg) != innermode)
+ reg = gen_lowpart (innermode, reg);
+ }
+
SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg);
seq = end_sequence ();
if (seq)
ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES_FROM_VEC]
#define TARGET_INTER_UNIT_CONVERSIONS \
ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS]
+#define TARGET_PREFER_BCST_FROM_INTEGER \
+ ix86_tune_features[X86_TUNE_PREFER_BCST_FROM_INTEGER]
+
#define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT]
#define TARGET_SCHEDULE ix86_tune_features[X86_TUNE_SCHEDULE]
#define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT]
DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions",
~(m_AMDFAM10 | m_BDVER))
+/* X86_TUNE_PREFER_BCST_FROM_INTEGER: Enable broadcast from integer for
+ 128/256/512-bit vector, if disabled, the move will be done by
+ broadcast/load from constant pool
+
+ broadcast from integer:
+ mov $0xa,%eax
+ vmovd %eax,%xmm0
+ vpbroadcastd %xmm0,%xmm0
+
+ broadcast/load from constant pool:
+ vpbroadcastd CST.0(%rip), %xmm0 */
+
+DEF_TUNE (X86_TUNE_PREFER_BCST_FROM_INTEGER, "prefer_bcst_from_integer",
+ m_ALL)
+
/* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
fp converts to destination register. */
DEF_TUNE (X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS, "split_mem_opnd_for_fp_converts",