emit_insn (gen_xorv4si3 (value, value, large));
}
-static bool ix86_expand_vector_init_one_nonzero (bool mmx_ok,
- machine_mode mode, rtx target,
- rtx var, int one_var);
-
/* Convert an unsigned DImode value into a DFmode, using only SSE.
Expects the 64-bit DImode to be supplied in a pair of integral
registers. Requires SSE2; will use SSE3 if available. For x86_32,
whose ONE_VAR element is VAR, and other elements are zero. Return true
if successful. */
-static bool
+bool
ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
rtx target, rtx var, int one_var)
{
return target;
}
+/* Trunc a vector to a narrow vector, like v4di -> v4si. */
+
+void
+ix86_expand_trunc_with_avx2_noavx512f (rtx output, rtx input, machine_mode cvt_mode)
+{
+ machine_mode out_mode = GET_MODE (output);
+ machine_mode in_mode = GET_MODE (input);
+ int len = GET_MODE_SIZE (in_mode);
+ gcc_assert (len == GET_MODE_SIZE (cvt_mode)
+ && GET_MODE_INNER (out_mode) == GET_MODE_INNER (cvt_mode)
+ && (REG_P (input) || SUBREG_P (input)));
+ scalar_mode inner_out_mode = GET_MODE_INNER (out_mode);
+ int in_innersize = GET_MODE_SIZE (GET_MODE_INNER (in_mode));
+ int out_innersize = GET_MODE_SIZE (inner_out_mode);
+
+ struct expand_vec_perm_d d;
+ d.target = gen_reg_rtx (cvt_mode);
+ d.op0 = lowpart_subreg (cvt_mode, force_reg(in_mode, input), in_mode);
+ d.op1 = d.op0;
+ d.vmode = cvt_mode;
+ d.nelt = GET_MODE_NUNITS (cvt_mode);
+ d.testing_p = false;
+ d.one_operand_p = true;
+
+ /* Init perm. Put the needed bits of input in order and
+ fill the rest of bits by default. */
+ for (int i = 0; i < d.nelt; ++i)
+ {
+ d.perm[i] = i;
+ if (i < GET_MODE_NUNITS (out_mode))
+ d.perm[i] = i * (in_innersize / out_innersize);
+ }
+
+ bool ok = ix86_expand_vec_perm_const_1(&d);
+ gcc_assert (ok);
+ emit_move_insn (output, gen_lowpart (out_mode, d.target));
+}
+
#include "gt-i386-expand.h"
extern rtx ix86_gen_ccmp_next (rtx_insn **, rtx_insn **, rtx,
enum rtx_code, tree, tree, enum rtx_code);
extern int ix86_get_flags_cc (enum rtx_code);
+extern void ix86_expand_trunc_with_avx2_noavx512f (rtx, rtx, machine_mode);
extern rtx ix86_memtag_untagged_pointer (rtx, rtx);
extern bool ix86_memtag_can_tag_addresses (void);
extern void ix86_expand_sse2_abs (rtx, rtx);
extern bool ix86_expand_vector_init_duplicate (bool, machine_mode, rtx,
rtx);
+extern bool ix86_expand_vector_init_one_nonzero (bool, machine_mode, rtx,
+ rtx, int);
extern bool ix86_extract_perm_from_pool_constant (int*, rtx);
/* In i386-c.cc */
(define_mode_attr mmxhalfmode
[(V4HI "V4QI") (V2HI "V2QI")])
+(define_mode_attr mmxbytemode
+ [(V4HI "V8QI") (V2HI "V4QI")])
+
(define_mode_attr mmxhalfmodelower
[(V4HI "v4qi") (V2HI "v2qi")])
DONE;
})
-(define_insn "trunc<mode><mmxhalfmodelower>2"
+(define_expand "trunc<mode><mmxhalfmodelower>2"
+ [(set (match_operand:<mmxhalfmode> 0 "register_operand")
+ (truncate:<mmxhalfmode>
+ (match_operand:VI2_32_64 1 "register_operand")))]
+ "TARGET_AVX2"
+{
+ if (TARGET_AVX512VL && TARGET_AVX512BW)
+ emit_insn (gen_avx512vl_trunc<mode><mmxhalfmodelower>2 (operands[0], operands[1]));
+ else
+ ix86_expand_trunc_with_avx2_noavx512f (operands[0], operands[1], <mmxbytemode>mode);
+ DONE;
+})
+
+(define_insn "avx512vl_trunc<mode><mmxhalfmodelower>2"
[(set (match_operand:<mmxhalfmode> 0 "register_operand" "=v")
(truncate:<mmxhalfmode>
(match_operand:VI2_32_64 1 "register_operand" "v")))]
(set_attr "mode" "TI")])
(define_mode_iterator V2QI_V2HI [V2QI V2HI])
-(define_insn "truncv2si<mode>2"
+(define_mode_attr v2qi_quad_v2hi_double
+ [(V2QI "V8QI") (V2HI "V4HI")])
+(define_expand "truncv2si<mode>2"
+ [(set (match_operand:V2QI_V2HI 0 "register_operand")
+ (truncate:V2QI_V2HI
+ (match_operand:V2SI 1 "register_operand")))]
+ "TARGET_AVX2 && TARGET_MMX_WITH_SSE"
+{
+ if (TARGET_AVX512VL)
+ emit_insn (gen_avx512vl_truncv2si<mode>2 (operands[0], operands[1]));
+ else
+ ix86_expand_trunc_with_avx2_noavx512f (operands[0], operands[1], <v2qi_quad_v2hi_double>mode);
+ DONE;
+})
+
+(define_insn "avx512vl_truncv2si<mode>2"
[(set (match_operand:V2QI_V2HI 0 "register_operand" "=v")
(truncate:V2QI_V2HI
(match_operand:V2SI 1 "register_operand" "v")))]
(define_mode_attr ssebytemode
[(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")
- (V16SI "V64QI") (V8SI "V32QI") (V4SI "V16QI")])
+ (V16SI "V64QI") (V8SI "V32QI") (V4SI "V16QI")
+ (V8HI "V16QI")])
(define_mode_attr sseintconvert
[(V32HI "w") (V16HI "w") (V8HI "w")
(define_mode_iterator PMOV_DST_MODE_2
[V4SI V8HI (V16QI "TARGET_AVX512BW")])
+(define_mode_iterator PMOV_DST_MODE_2_AVX2
+ [V4SI V8HI V16QI])
(define_mode_attr pmov_suff_2
[(V16QI "wb") (V8HI "dw") (V4SI "qd")])
(define_expand "trunc<ssedoublemodelower><mode>2"
- [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
- (truncate:PMOV_DST_MODE_2
+ [(set (match_operand:PMOV_DST_MODE_2_AVX2 0 "nonimmediate_operand")
+ (truncate:PMOV_DST_MODE_2_AVX2
(match_operand:<ssedoublemode> 1 "register_operand")))]
- "TARGET_AVX512VL")
+ "TARGET_AVX2"
+{
+ if (!TARGET_AVX512VL
+ || (<MODE>mode == V16QImode && !TARGET_AVX512BW))
+ {
+ ix86_expand_trunc_with_avx2_noavx512f (operands[0],
+ operands[1],
+ <ssedoublevecmode>mode);
+ DONE;
+ }
+})
(define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
[(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
"TARGET_AVX512VL")
(define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
+(define_mode_iterator PMOV_SRC_MODE_3_AVX2 [V4DI V2DI V8SI V4SI V8HI])
(define_mode_attr pmov_dst_3_lower
[(V4DI "v4qi") (V2DI "v2qi") (V8SI "v8qi") (V4SI "v4qi") (V8HI "v8qi")])
(define_mode_attr pmov_dst_3
(define_expand "trunc<mode><pmov_dst_3_lower>2"
[(set (match_operand:<pmov_dst_3> 0 "register_operand")
(truncate:<pmov_dst_3>
- (match_operand:PMOV_SRC_MODE_3 1 "register_operand")))]
- "TARGET_AVX512VL"
+ (match_operand:PMOV_SRC_MODE_3_AVX2 1 "register_operand")))]
+ "TARGET_AVX2"
{
- rtx op0 = gen_reg_rtx (V16QImode);
+ if (TARGET_AVX512VL
+ && (<MODE>mode != V8HImode || TARGET_AVX512BW))
+ {
+ rtx op0 = gen_reg_rtx (V16QImode);
- emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>qi2
- (op0, operands[1], CONST0_RTX (<pmov_dst_zeroed_3>mode)));
+ emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>qi2
+ (op0, operands[1], CONST0_RTX (<pmov_dst_zeroed_3>mode)));
- emit_move_insn (operands[0],
- lowpart_subreg (<pmov_dst_3>mode, op0, V16QImode));
+ emit_move_insn (operands[0],
+ lowpart_subreg (<pmov_dst_3>mode, op0, V16QImode));
+ }
+ else
+ {
+ ix86_expand_trunc_with_avx2_noavx512f (operands[0],
+ operands[1],
+ <ssebytemode>mode);
+ }
DONE;
})
[(set (match_operand:<pmov_dst_4> 0 "register_operand")
(truncate:<pmov_dst_4>
(match_operand:PMOV_SRC_MODE_4 1 "register_operand")))]
- "TARGET_AVX512VL"
+ "TARGET_AVX2"
{
- rtx op0 = gen_reg_rtx (V8HImode);
+ if (TARGET_AVX512VL)
+ {
+ rtx op0 = gen_reg_rtx (V8HImode);
- emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>hi2
- (op0, operands[1], CONST0_RTX (<pmov_dst_zeroed_4>mode)));
+ emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>hi2
+ (op0, operands[1], CONST0_RTX (<pmov_dst_zeroed_4>mode)));
- emit_move_insn (operands[0],
- lowpart_subreg (<pmov_dst_4>mode, op0, V8HImode));
+ emit_move_insn (operands[0],
+ lowpart_subreg (<pmov_dst_4>mode, op0, V8HImode));
+ DONE;
+ }
+ else
+ ix86_expand_trunc_with_avx2_noavx512f (operands[0], operands[1], <ssewvecmode>mode);
DONE;
})
[(set (match_operand:V2SI 0 "register_operand")
(truncate:V2SI
(match_operand:V2DI 1 "register_operand")))]
- "TARGET_AVX512VL"
+ "TARGET_AVX2"
{
- rtx op0 = gen_reg_rtx (V4SImode);
+ if (TARGET_AVX512VL)
+ {
+ rtx op0 = gen_reg_rtx (V4SImode);
- emit_insn (gen_avx512vl_truncatev2div2si2
- (op0, operands[1], CONST0_RTX (V2SImode)));
+ emit_insn (gen_avx512vl_truncatev2div2si2
+ (op0, operands[1], CONST0_RTX (V2SImode)));
- emit_move_insn (operands[0],
- lowpart_subreg (V2SImode, op0, V4SImode));
+ emit_move_insn (operands[0],
+ lowpart_subreg (V2SImode, op0, V4SImode));
+ }
+ else
+ {
+ rtx tmp = lowpart_subreg (V4SImode,
+ force_reg (V2DImode, operands[1]), V2DImode);
+ rtx op0 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse_shufps_v4si (op0, tmp, tmp, const0_rtx, GEN_INT (2),
+ GEN_INT (6), GEN_INT (7)));
+ emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
+ }
DONE;
})
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v3 -O2" } */
+/* { dg-final { scan-assembler-times "vshufps" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufb" 18 } } */
+/* { dg-final { scan-assembler-times "vpermd" 1 } } */
+/* { dg-final { scan-assembler-times "vpermq" 5 } } */
+/* { dg-final { scan-assembler-times "vpshuflw" 1 { target { ! ia32 } } } } */
+
+#include <x86intrin.h>
+
+typedef short __v2hi __attribute__ ((__vector_size__ (4)));
+typedef char __v2qi __attribute__ ((__vector_size__ (2)));
+typedef char __v4qi __attribute__ ((__vector_size__ (4)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+
+__v2si mm_cvtepi64_epi32_builtin_convertvector(__v2di a)
+{
+ return __builtin_convertvector((__v2di)a, __v2si);
+}
+
+__v4si mm256_cvtepi64_epi32_builtin_convertvector(__v4di a)
+{
+ return __builtin_convertvector((__v4di)a, __v4si);
+}
+
+__v2hi mm_cvtepi64_epi16_builtin_convertvector(__m128i a)
+{
+ return __builtin_convertvector((__v2di)a, __v2hi);
+}
+
+__v4hi mm256_cvtepi64_epi16_builtin_convertvector(__m256i a)
+{
+ return __builtin_convertvector((__v4di)a, __v4hi);
+}
+
+__v2hi mm64_cvtepi32_epi16_builtin_convertvector(__v2si a)
+{
+ return __builtin_convertvector((__v2si)a, __v2hi);
+}
+
+__v4hi mm_cvtepi32_epi16_builtin_convertvector(__m128i a)
+{
+ return __builtin_convertvector((__v4si)a, __v4hi);
+}
+
+__v8hi mm256_cvtepi32_epi16_builtin_convertvector(__v8si a)
+{
+ return __builtin_convertvector((__v8si)a, __v8hi);
+}
+
+__v2qi mm_cvtepi64_epi8_builtin_convertvector(__m128i a)
+{
+ return __builtin_convertvector((__v2di)a, __v2qi);
+}
+
+__v4qi mm256_cvtepi64_epi8_builtin_convertvector(__m256i a)
+{
+ return __builtin_convertvector((__v4di)a, __v4qi);
+}
+
+__v2qi mm64_cvtepi32_epi8_builtin_convertvector(__v2si a)
+{
+ return __builtin_convertvector((__v2si)a, __v2qi);
+}
+
+__v4qi mm_cvtepi32_epi8_builtin_convertvector(__m128i a)
+{
+ return __builtin_convertvector((__v4si)a, __v4qi);
+}
+
+__v8qi mm256_cvtepi32_epi8_builtin_convertvector(__m256i a)
+{
+ return __builtin_convertvector((__v8si)a, __v8qi);
+}
+
+__v2qi mm32_cvtepi16_epi8_builtin_convertvector(__v2hi a)
+{
+ return __builtin_convertvector((__v2hi)a, __v2qi);
+}
+
+__v4qi mm64_cvtepi16_epi8_builtin_convertvector(__v4hi a)
+{
+ return __builtin_convertvector((__v4hi)a, __v4qi);
+}
+
+__v8qi mm_cvtepi16_epi8_builtin_convertvector(__m128i a)
+{
+ return __builtin_convertvector((__v8hi)a, __v8qi);
+}
+
+__v16qi mm256_cvtepi16_epi8_builtin_convertvector(__v16hi a)
+{
+ return __builtin_convertvector((__v16hi)a, __v16qi);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-march=x86-64-v3 -O2 -flax-vector-conversions" } */
+#include <x86intrin.h>
+
+#include "avx-check.h"
+
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+typedef short __v2hi __attribute__ ((__vector_size__ (4)));
+typedef char __v2qi __attribute__ ((__vector_size__ (2)));
+typedef char __v4qi __attribute__ ((__vector_size__ (4)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+
+typedef union
+{
+ __v2si x;
+ int a[2];
+} union64i_d;
+
+typedef union
+{
+ __v2hi x;
+ short a[2];
+} union32i_w;
+
+typedef union
+{
+ __v4hi x;
+ short a[4];
+} union64i_w;
+
+typedef union
+{
+ __v2qi x;
+ char a[2];
+} union16i_b;
+
+typedef union
+{
+ __v4qi x;
+ char a[4];
+} union32i_b;
+
+typedef union
+{
+ __v8qi x;
+ char a[8];
+} union64i_b;
+
+#define CHECK_EXP_LESS128(UNION_TYPE, VALUE_TYPE, FMT) \
+static int \
+__attribute__((noinline, unused)) \
+check_##UNION_TYPE (UNION_TYPE u, const VALUE_TYPE * v) \
+{ \
+ int i; \
+ int err = 0; \
+ \
+ for (i = 0; i < ARRAY_SIZE (u.a); i++) \
+ if (u.a[i] != v[i]) \
+ { \
+ err++; \
+ PRINTF ("%i: " FMT " != " FMT "\n", \
+ i, v[i], u.a[i]); \
+ } \
+ return err; \
+}
+
+CHECK_EXP_LESS128 (union64i_d, int, "%d");
+CHECK_EXP_LESS128 (union32i_w, short, "%d");
+CHECK_EXP_LESS128 (union64i_w, short, "%d");
+CHECK_EXP_LESS128 (union16i_b, char, "%d");
+CHECK_EXP_LESS128 (union32i_b, char, "%d");
+CHECK_EXP_LESS128 (union64i_b, char, "%d");
+
+#define SUBTEST(INPUT_TYPE, OUTPUT_TYPE, OUTPUT_INNER, INIT_TYPE, CVT_TYPE) \
+void do_test##INIT_TYPE##CVT_TYPE () \
+{ \
+ INPUT_TYPE s; \
+ OUTPUT_TYPE r, ref; \
+ for (int i = 0; i < ARRAY_SIZE (s.a); i++) \
+ { \
+ s.a[i] = (i + 23415) * (i + 341); \
+ ref.a[i] = (OUTPUT_INNER) s.a[i]; \
+ } \
+ r.x = __builtin_convertvector((INIT_TYPE)s.x, CVT_TYPE); \
+ \
+ if (check_##OUTPUT_TYPE (r, ref.a)) \
+ abort (); \
+ return; \
+}
+
+SUBTEST(union128i_q, union64i_d, int, __v2di, __v2si);
+SUBTEST(union256i_q, union128i_d, int, __v4di, __v4si);
+SUBTEST(union128i_q, union32i_w, short, __v2di, __v2hi);
+SUBTEST(union256i_q, union64i_w, short, __v4di, __v4hi);
+SUBTEST(union64i_d, union32i_w, short, __v2si, __v2hi);
+SUBTEST(union128i_d, union64i_w, short, __v4si, __v4hi);
+SUBTEST(union256i_d, union128i_w, short, __v8si, __v8hi);
+SUBTEST(union128i_q, union16i_b, char, __v2di, __v2qi);
+SUBTEST(union256i_q, union32i_b, char, __v4di,__v4qi);
+SUBTEST(union64i_d, union16i_b, char, __v2si, __v2qi);
+SUBTEST(union128i_d, union32i_b, char, __v4si, __v4qi);
+SUBTEST(union256i_d, union64i_b, char, __v8si, __v8qi);
+SUBTEST(union32i_w, union16i_b, char, __v2hi, __v2qi);
+SUBTEST(union64i_w, union32i_b, char, __v4hi, __v4qi);
+SUBTEST(union128i_w, union64i_b, char, __v8hi, __v8qi);
+SUBTEST(union256i_w, union128i_b, char, __v16hi, __v16qi);
+
+void TEST (void)
+{
+ do_test__v2di__v2si ();
+ do_test__v2di__v2hi ();
+ do_test__v2di__v2qi ();
+ do_test__v4di__v4si ();
+ do_test__v4di__v4hi ();
+ do_test__v4di__v4qi ();
+ do_test__v2si__v2hi ();
+ do_test__v2si__v2qi ();
+ do_test__v4si__v4hi ();
+ do_test__v4si__v4qi ();
+ do_test__v8si__v8hi ();
+ do_test__v8si__v8qi ();
+ do_test__v2hi__v2qi ();
+ do_test__v4hi__v4qi ();
+ do_test__v8hi__v8qi ();
+ do_test__v16hi__v16qi ();
+}
a uniform CTOR with a vector promotion to a CTOR on a promoted
element. */
/* { dg-final { scan-tree-dump-times "\\(vector\\(16\\) short unsigned int\\)" 2 "optimized" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "VEC_PACK_TRUNC" 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 2 "optimized" } } */