r14-1902-g96c3539f2a3813 split TImode move with 2 DImode move, it's
supposed to optimize TImode in parameter/return since accoring to
psABI it's stored into 2 general registers.
But when TImode is not in parameter/return, it could create redundancy
in the PR.
The patch add a splitter to handle that.
.i.e.
(insn 10 9 14 2 (set (subreg:V2DI (reg:V4SI 98 [ <retval> ]) 0)
(vec_concat:V2DI (subreg:DI (reg:TI 101) 0)
(subreg:DI (reg:TI 101) 8)))
8442 {vec_concatv2di}
(expr_list:REG_DEAD (reg:TI 101)
gcc/ChangeLog:
PR target/121274
* config/i386/sse.md (*vec_concatv2di_0): Add a splitter
before it.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr121274.c: New test.
(cherry picked from commit
6a466839340dce3b596b3ae5ce85bd05a067ae00)
(const_string "orig")))
(set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
+;; Eliminate redundancy caused by
+;; /* Special case TImode to 128-bit vector conversions via V2DI. */
+;; in ix86_expand_vector_move
+
+(define_split
+ [(set (match_operand:V2DI 0 "register_operand")
+ (vec_concat:V2DI
+ (subreg:DI (match_operand:TI 1 "register_operand") 0)
+ (subreg:DI (match_dup 1) 8)))]
+ "TARGET_SSE2 && ix86_pre_reload_split ()"
+ [(set (match_dup 0)
+ (subreg:V2DI (match_dup 1) 0))])
+
(define_insn "*vec_concatv2di_0"
[(set (match_operand:V2DI 0 "register_operand" "=v,v ,x")
(vec_concat:V2DI
--- /dev/null
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-march=x86-64-v4 -O2" } */
+/* { dg-final { scan-assembler-not "vpextrq" } } */
+/* { dg-final { scan-assembler-not "vpinsrq" } } */
+
+typedef int v16si __attribute__((vector_size(64)));
+typedef int v4si __attribute__((vector_size(16)));
+
+v4si f(v16si x)
+{
+ return __builtin_shufflevector(x, x, 0, 1, 2, 3);
+}
+
+v4si g(v16si x)
+{
+return __builtin_shufflevector(x, x, 4, 5, 6, 7);
+}
+
+v4si f1(__int128 *x)
+{
+ __int128 t = *x;
+ asm("":"+x"(t));
+ return (v4si)t;
+}