switch (mode)
{
case E_V2SFmode:
- if (TARGET_SSE4_1)
+ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
gen = gen_mmx_blendvps;
break;
case E_V4SFmode:
- if (TARGET_SSE4_1)
+ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
gen = gen_sse4_1_blendvps;
break;
case E_V2DFmode:
- if (TARGET_SSE4_1)
+ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
gen = gen_sse4_1_blendvpd;
break;
case E_SFmode:
- if (TARGET_SSE4_1)
+ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
gen = gen_sse4_1_blendvss;
break;
case E_DFmode:
- if (TARGET_SSE4_1)
+ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
gen = gen_sse4_1_blendvsd;
break;
case E_V8QImode:
case E_V4HFmode:
case E_V4BFmode:
case E_V2SImode:
- if (TARGET_SSE4_1)
+ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
{
gen = gen_mmx_pblendvb_v8qi;
blend_mode = V8QImode;
case E_V2HImode:
case E_V2HFmode:
case E_V2BFmode:
- if (TARGET_SSE4_1)
+ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
{
gen = gen_mmx_pblendvb_v4qi;
blend_mode = V4QImode;
}
break;
case E_V2QImode:
- if (TARGET_SSE4_1)
+ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
gen = gen_mmx_pblendvb_v2qi;
break;
case E_V16QImode:
case E_V4SImode:
case E_V2DImode:
case E_V1TImode:
- if (TARGET_SSE4_1)
+ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
{
gen = gen_sse4_1_pblendvb;
blend_mode = V16QImode;
}
break;
case E_V8SFmode:
- if (TARGET_AVX)
+ if (TARGET_AVX && TARGET_SSE_MOVCC_USE_BLENDV)
gen = gen_avx_blendvps256;
break;
case E_V4DFmode:
- if (TARGET_AVX)
+ if (TARGET_AVX && TARGET_SSE_MOVCC_USE_BLENDV)
gen = gen_avx_blendvpd256;
break;
case E_V32QImode:
case E_V16BFmode:
case E_V8SImode:
case E_V4DImode:
- if (TARGET_AVX2)
+ if (TARGET_AVX2 && TARGET_SSE_MOVCC_USE_BLENDV)
{
gen = gen_avx2_pblendvb;
blend_mode = V32QImode;
ix86_tune_features[X86_TUNE_DEST_FALSE_DEP_FOR_GLC]
#define TARGET_SLOW_STC ix86_tune_features[X86_TUNE_SLOW_STC]
#define TARGET_USE_RCR ix86_tune_features[X86_TUNE_USE_RCR]
+#define TARGET_SSE_MOVCC_USE_BLENDV \
+ ix86_tune_features[X86_TUNE_SSE_MOVCC_USE_BLENDV]
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {
DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD,
"v2df_reduction_prefer_haddpd", m_NONE)
+/* X86_TUNE_SSE_MOVCC_USE_BLENDV: Prefer blendv instructions to
+ 3-instruction sequence (op1 & mask) | (op2 & ~mask)
+ for vector condition move.
+ For Crestmont, 4-operand vex blendv instructions come from MSROM
+ which is slow. */
+DEF_TUNE (X86_TUNE_SSE_MOVCC_USE_BLENDV,
+ "sse_movcc_use_blendv", ~m_CORE_ATOM)
+
/*****************************************************************************/
/* AVX instruction selection tuning (some of SSE flags affects AVX, too) */
/*****************************************************************************/
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=sierraforest -O2" } */
+/* { dg-final { scan-assembler-not {(?n)vp?blendv(b|ps|pd)} } } */
+
+void
+foo (int* a, int* b, int* __restrict c)
+{
+ for (int i = 0; i != 200; i++)
+ {
+ c[i] += a[i] > b[i] ? 1 : -1;
+ }
+}