case E_V8HFmode:
case E_V4SImode:
case E_V2DImode:
+ if (TARGET_SSE_REDUCTION_PREFER_PSHUF)
+ {
+ if (i == 128)
+ {
+ d = gen_reg_rtx (V4SImode);
+ tem = gen_sse2_pshufd_1 (
+ d, force_reg (V4SImode, gen_lowpart (V4SImode, src)),
+ GEN_INT (2), GEN_INT (3), GEN_INT (2), GEN_INT (3));
+ break;
+ }
+ else if (i == 64)
+ {
+ d = gen_reg_rtx (V4SImode);
+ tem = gen_sse2_pshufd_1 (
+ d, force_reg (V4SImode, gen_lowpart (V4SImode, src)),
+ GEN_INT (1), GEN_INT (1), GEN_INT (1), GEN_INT (1));
+ break;
+ }
+ else if (i == 32)
+ {
+ d = gen_reg_rtx (V8HImode);
+ tem = gen_sse2_pshuflw_1 (
+ d, force_reg (V8HImode, gen_lowpart (V8HImode, src)),
+ GEN_INT (1), GEN_INT (1), GEN_INT (1), GEN_INT (1));
+ break;
+ }
+ }
d = gen_reg_rtx (V1TImode);
tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
GEN_INT (i / 2));
#define TARGET_SSE_MOVCC_USE_BLENDV \
ix86_tune_features[X86_TUNE_SSE_MOVCC_USE_BLENDV]
#define TARGET_ALIGN_TIGHT_LOOPS \
- ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS]
+ ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS]
+#define TARGET_SSE_REDUCTION_PREFER_PSHUF \
+ ix86_tune_features[X86_TUNE_SSE_REDUCTION_PREFER_PSHUF]
/* Feature tests against the various architecture variations. */
DEF_TUNE (X86_TUNE_SSE_MOVCC_USE_BLENDV,
"sse_movcc_use_blendv", ~m_CORE_ATOM)
+/* X86_TUNE_V4SI_REDUCTION_PREFER_SHUFD: Prefer pshuf to reduce V16QI,
+ V8HI, V8HI, V4SI, V4FI, V2DI modes when lshr are costlier. */
+DEF_TUNE (X86_TUNE_SSE_REDUCTION_PREFER_PSHUF,
+ "sse_reduction_prefer_pshuf", m_ZNVER4 | m_ZNVER5)
+
/*****************************************************************************/
/* AVX instruction selection tuning (some of SSE flags affects AVX, too) */
/*****************************************************************************/
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=znver5 " } */
+
+#define N 32
+#define T short
+T
+foo (T *a)
+{
+ T sum = 0;
+ for (int i = 0; i < N; i++)
+ sum += a[i];
+ return sum;
+}
+
+/* { dg-final { scan-assembler-times "vpsrl" 0 } } */
+/* { dg-final { scan-assembler-times "vpshuf" 3 } } */