bool ok = ix86_expand_vector_init_duplicate (false, vector_mode,
target,
GEN_INT (val_broadcast));
- gcc_assert (ok);
+ if (!ok)
+ return nullptr;
target = lowpart_subreg (mode, target, vector_mode);
return target;
}
&& INTEGRAL_MODE_P (mode))
return nullptr;
- unsigned int msize = GET_MODE_SIZE (mode);
- unsigned int inner_size = GET_MODE_SIZE (GET_MODE_INNER ((mode)));
-
/* Convert CONST_VECTOR to a non-standard SSE constant integer
broadcast only if vector broadcast is available. */
if (standard_sse_constant_p (op, mode))
return nullptr;
- /* vpbroadcast[b,w] is available under TARGET_AVX2.
- or TARGET_AVX512BW for zmm. */
- if (inner_size < 4 && !(msize == 64 ? TARGET_AVX512BW : TARGET_AVX2))
- return nullptr;
-
if (GET_MODE_INNER (mode) == TImode)
return nullptr;
{
/* Broadcast to XMM/YMM/ZMM register from an integer
constant or scalar mem. */
- op1 = gen_reg_rtx (mode);
- if (FLOAT_MODE_P (mode)
- || (!TARGET_64BIT && GET_MODE_INNER (mode) == DImode)
- /* vbroadcastss/vbroadcastsd only supports memory operand
- w/o AVX2, force them into memory to avoid spill to
- memory. */
- || (GET_MODE_SIZE (mode) == 32
- && (GET_MODE_INNER (mode) == DImode
- || GET_MODE_INNER (mode) == SImode)
- && !TARGET_AVX2))
+ rtx tmp = gen_reg_rtx (mode);
+ if (FLOAT_MODE_P (mode))
first = force_const_mem (GET_MODE_INNER (mode), first);
bool ok = ix86_expand_vector_init_duplicate (false, mode,
- op1, first);
- gcc_assert (ok);
- emit_move_insn (op0, op1);
- return;
+ tmp, first);
+ if (!ok && !TARGET_64BIT && GET_MODE_INNER (mode) == DImode)
+ {
+ first = force_const_mem (GET_MODE_INNER (mode), first);
+ ok = ix86_expand_vector_init_duplicate (false, mode,
+ tmp, first);
+ }
+ if (ok)
+ {
+ emit_move_insn (op0, tmp);
+ return;
+ }
}
}
switch (mode)
{
+ case E_V2DImode:
+ if (CONST_INT_P (val))
+ {
+ int tmp = (int)INTVAL (val);
+ if (tmp == (int)(INTVAL (val) >> 32))
+ {
+ rtx reg = gen_reg_rtx (V4SImode);
+ ok = ix86_vector_duplicate_value (V4SImode, reg,
+ GEN_INT (tmp));
+ if (ok)
+ {
+ emit_move_insn (target, gen_lowpart (V2DImode, reg));
+ return true;
+ }
+ }
+ }
+ return ix86_vector_duplicate_value (mode, target, val);
+
+ case E_V4DImode:
+ if (CONST_INT_P (val))
+ {
+ int tmp = (int)INTVAL (val);
+ if (tmp == (int)(INTVAL (val) >> 32))
+ {
+ rtx reg = gen_reg_rtx (V8SImode);
+ ok = ix86_vector_duplicate_value (V8SImode, reg,
+ GEN_INT (tmp));
+ if (ok)
+ {
+ emit_move_insn (target, gen_lowpart (V4DImode, reg));
+ return true;
+ }
+ }
+ }
+ return ix86_vector_duplicate_value (mode, target, val);
+
case E_V2SImode:
case E_V2SFmode:
if (!mmx_ok)
/* FALLTHRU */
case E_V4DFmode:
- case E_V4DImode:
case E_V8SFmode:
case E_V8SImode:
case E_V2DFmode:
- case E_V2DImode:
case E_V4SFmode:
case E_V4SImode:
case E_V16SImode:
rtx x;
val = gen_lowpart (SImode, val);
+ if (CONST_INT_P (val))
+ return false;
x = gen_rtx_TRUNCATE (HImode, val);
x = gen_rtx_VEC_DUPLICATE (mode, x);
emit_insn (gen_rtx_SET (target, x));
rtx x;
val = gen_lowpart (SImode, val);
+ if (CONST_INT_P (val))
+ return false;
x = gen_rtx_TRUNCATE (HImode, val);
x = gen_rtx_VEC_DUPLICATE (mode, x);
emit_insn (gen_rtx_SET (target, x));
goto widen;
case E_V8HImode:
+ if (CONST_INT_P (val))
+ goto widen;
+ /* FALLTHRU */
+
case E_V8HFmode:
case E_V8BFmode:
if (TARGET_AVX2)
goto widen;
case E_V16QImode:
+ if (CONST_INT_P (val))
+ goto widen;
if (TARGET_AVX2)
return ix86_vector_duplicate_value (mode, target, val);
val = convert_modes (wsmode, smode, val, true);
- if (smode == QImode && !TARGET_PARTIAL_REG_STALL)
+ if (CONST_INT_P (val))
+ {
+ x = simplify_binary_operation (ASHIFT, wsmode, val,
+ GEN_INT (GET_MODE_BITSIZE (smode)));
+ val = simplify_binary_operation (IOR, wsmode, val, x);
+ }
+ else if (smode == QImode && !TARGET_PARTIAL_REG_STALL)
emit_insn (gen_insv_1 (wsmode, val, val));
else
{
x = gen_reg_rtx (wvmode);
ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
- gcc_assert (ok);
+ if (!ok)
+ return false;
emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
- return ok;
+ return true;
}
case E_V16HImode:
+ case E_V32QImode:
+ if (CONST_INT_P (val))
+ goto widen;
+ /* FALLTHRU */
+
case E_V16HFmode:
case E_V16BFmode:
- case E_V32QImode:
if (TARGET_AVX2)
return ix86_vector_duplicate_value (mode, target, val);
else
rtx x = gen_reg_rtx (hvmode);
ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
- gcc_assert (ok);
+ if (!ok)
+ return false;
x = gen_rtx_VEC_CONCAT (mode, x, x);
emit_insn (gen_rtx_SET (target, x));
rtx x = gen_reg_rtx (hvmode);
ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
- gcc_assert (ok);
+ if (!ok)
+ return false;
x = gen_rtx_VEC_CONCAT (mode, x, x);
emit_insn (gen_rtx_SET (target, x));
all_same = false;
}
+ /* If all values are identical, broadcast the value. */
+ if (all_same
+ && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
+ XVECEXP (vals, 0, 0)))
+ return;
+
/* Constants are best loaded from the constant pool. */
if (n_var == 0)
{
return;
}
- /* If all values are identical, broadcast the value. */
- if (all_same
- && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
- XVECEXP (vals, 0, 0)))
- return;
-
/* Values where only one field is non-constant are best loaded from
the pool and overwritten via move later. */
if (n_var == 1)
return result;
}
-/* { dg-final { scan-rtl-dump-times "0xfffffffffefefefe" 1 "expand" } } */
+/* { dg-final { scan-rtl-dump-times "0xfffffffffefefefe" 3 "expand" } } */
/* { dg-final { scan-rtl-dump-times "\\\[0xfefefefefefefefe\\\]" 2 "expand" } } */
/* { dg-final { scan-rtl-dump-times "0xfffffffffffffffe\\\]\\\) repeated x16" 2 "expand" } } */
/* { dg-options "-O2 -mavx512f -mavx512dq" } */
/* { dg-additional-options "-fno-PIE" { target ia32 } } */
/* { dg-additional-options "-mdynamic-no-pic" { target { *-*-darwin* && ia32 } } }
-/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to8\\\}" 2 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to8\\\}" 5 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to8\\\}" 2 } } */
/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to16\\\}" 2 } } */
/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %zmm\[0-9\]+" 3 } } */
/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%r\[^\n\]*, %zmm\[0-9\]+" 3 { target { ! ia32 } } } } */
/* { dg-options "-O2 -mavx512f" } */
/* { dg-additional-options "-fno-PIE" { target ia32 } } */
/* { dg-additional-options "-mdynamic-no-pic" { target { *-*-darwin* && ia32 } } }
-/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to8\\\}" 4 { target ia32 } } } */
+/* { dg-final { scan-assembler-not "\[^\n\]*\\\{1to8\\\}" { target ia32 } } } */
/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %zmm\[0-9\]+" 4 } } */
/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%r\[^\n\]*, %zmm\[0-9\]+" 4 { target { ! ia32 } } } } */
return _mm256_abs_ph (a);
}
-/* { dg-final { scan-assembler-times "vpbroadcastq\[^\n\]*%ymm\[0-9\]+" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%ymm\[0-9\]+" 1 { target { ! ia32 } } } } */
/* { dg-final { scan-assembler-times "vpand\[^\n\]*%ymm\[0-9\]+" 1 } } */
__m128h
return _mm_abs_ph (a);
}
-/* { dg-final { scan-assembler-times "vpbroadcastq\[^\n\]*%xmm\[0-9\]+" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%xmm\[0-9\]+" 1 { target { ! ia32 } } } } */
/* { dg-final { scan-assembler-times "vpand\[^\n\]*%xmm\[0-9\]+" 1 } } */
/* { dg-options "-O2 -mavx512f -mavx512vl -mavx512dq" } */
/* { dg-additional-options "-fno-PIE" { target ia32 } } */
/* { dg-additional-options "-mdynamic-no-pic" { target { *-*-darwin* && ia32 } } }
-/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to2\\\}" 2 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to4\\\}" 4 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to2\\\}" 5 { target ia32 } } } */
-/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to4\\\}" 7 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to2\\\}" 2 } } */
+/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to4\\\}" 4 } } */
/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to8\\\}" 2 } } */
/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 3 } } */
/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 3 } } */
/* { dg-options "-O2 -mavx512f -mavx512vl" } */
/* { dg-additional-options "-fno-PIE" { target ia32 } } */
/* { dg-additional-options "-mdynamic-no-pic" { target { *-*-darwin* && ia32 } } }
-/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to2\\\}" 4 { target ia32 } } } */
-/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to4\\\}" 4 { target ia32 } } } */
+/* { dg-final { scan-assembler-not "\[^\n\]*\\\{1to2\\\}" { target ia32 } } } */
+/* { dg-final { scan-assembler-not "\[^\n\]*\\\{1to4\\\}" { target ia32 } } } */
/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 4 } } */
/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 4 } } */
/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%r\[^\n\]*, %xmm\[0-9\]+" 4 { target { ! ia32 } } } } */
__builtin_memset (dst, 3, 16);
}
-/* { dg-final { scan-assembler-times "movdqa\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movd\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "pshufd" 1 } } */
/* { dg-final { scan-assembler-times "movups\[\\t \]%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
array[i] = MK_CONST128_BROADCAST (0x1f);
}
-/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 8 } } */
#include "pr100865-10a.c"
-/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%ymm\[0-9\]+, " 8 } } */
__builtin_memset (dst, 3, 16);
}
-/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%xmm\[0-9\]+, %xmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, %xmm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
/* { dg-final { scan-assembler-not "vmovdqa" } } */
__builtin_memset (dst, 3, 16);
}
-/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
-/* { dg-final { scan-assembler-not "vpbroadcastb\[\\t \]+%xmm\[0-9\]+, %xmm\[0-9\]+" } } */
+/* { dg-final { scan-assembler-not "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, %xmm\[0-9\]+" } } */
/* { dg-final { scan-assembler-not "vmovdqa" } } */
array[i] = -45;
}
-/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 2 } } */
/* { dg-final { scan-assembler-not "vmovdqa" } } */
#include "pr100865-4a.c"
-/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%ymm\[0-9\]+, " 2 } } */
/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
-/* { dg-final { scan-assembler-not "vpbroadcastb\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" } } */
+/* { dg-final { scan-assembler-not "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" } } */
/* { dg-final { scan-assembler-not "vmovdqa" } } */
array[i] = -45;
}
-/* { dg-final { scan-assembler-times "vpbroadcastw\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 4 } } */
/* { dg-final { scan-assembler-not "vmovdqa" } } */
#include "pr100865-5a.c"
-/* { dg-final { scan-assembler-times "vpbroadcastw\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu16\[\\t \]%ymm\[0-9\]+, " 4 } } */
-/* { dg-final { scan-assembler-not "vpbroadcastw\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" } } */
+/* { dg-final { scan-assembler-not "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" } } */
/* { dg-final { scan-assembler-not "vmovdqa" } } */
array[i] = MK_CONST128_BROADCAST (0x1fff);
}
-/* { dg-final { scan-assembler-times "vpbroadcastw\[\\t \]+%xmm\[0-9\]+, %xmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, %xmm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa\[\\t \]%xmm\[0-9\]+, " 16 } } */
#include "pr100865-9a.c"
-/* { dg-final { scan-assembler-times "vpbroadcastw\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa\[\\t \]%xmm\[0-9\]+, " 16 } } */
return _mm256_set1_epi16 (12);
}
-/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%r\[^\n\]*, %ymm\[0-9\]+" 1 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%\[^\n\]*, %ymm\[0-9\]+" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1 { target ia32 } } } */
/* { dg-final { scan-assembler-not "vmovdqa" } } */
/* { dg-final { scan-assembler-not "vzeroupper" } } */
__builtin_memset (dst, 12, 19);
}
-/* { dg-final { scan-assembler-times "vpbroadcastb" 1 } } */
+/* { dg-final { scan-assembler-times "vpbroadcastd" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
/* { dg-final { scan-assembler-times "vmovd\[\\t \]+%xmm\[0-9\]+, 16\\(%\[\^,\]+\\)" 1 { xfail *-*-* } } } */