wi::to_wide (@ipos) + isize))
(BIT_FIELD_REF @0 @rsize @rpos)))))
+/* Simplify vector inserts of other vector extracts to a permute. */
+(simplify
+ (bit_insert @0 (BIT_FIELD_REF@2 @1 @rsize @rpos) @ipos)
+ (if (VECTOR_TYPE_P (type)
+ && types_match (@0, @1)
+ && types_match (TREE_TYPE (TREE_TYPE (@0)), TREE_TYPE (@2))
+ && TYPE_VECTOR_SUBPARTS (type).is_constant ())
+ (with
+ {
+ unsigned HOST_WIDE_INT elsz
+ = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (TREE_TYPE (@1))));
+ poly_uint64 relt = exact_div (tree_to_poly_uint64 (@rpos), elsz);
+ poly_uint64 ielt = exact_div (tree_to_poly_uint64 (@ipos), elsz);
+ unsigned nunits = TYPE_VECTOR_SUBPARTS (type).to_constant ();
+ vec_perm_builder builder;
+ builder.new_vector (nunits, nunits, 1);
+ for (unsigned i = 0; i < nunits; ++i)
+ builder.quick_push (known_eq (ielt, i) ? nunits + relt : i);
+ vec_perm_indices sel (builder, 2, nunits);
+ }
+ (if (!VECTOR_MODE_P (TYPE_MODE (type))
+ || can_vec_perm_const_p (TYPE_MODE (type), TYPE_MODE (type), sel, false))
+ (vec_perm @0 @1 { vec_perm_indices_to_tree
+ (build_vector_type (ssizetype, nunits), sel); })))))
+
(if (canonicalize_math_after_vectorization_p ())
(for fmas (FMA)
(simplify
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized -Wno-psabi -w" } */
+
+#define vector __attribute__((__vector_size__(16) ))
+
+vector int g(vector int a)
+{
+ int b = a[0];
+ a[0] = b;
+ return a;
+}
+
+/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 0 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 0 "optimized" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized -Wno-psabi -w" } */
+
+#define vector __attribute__((__vector_size__(16) ))
+
+vector int g(vector int a, int c)
+{
+ int b = a[2];
+ a[2] = b;
+ a[1] = c;
+ return a;
+}
+
+/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 0 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "optimized" } } */
/* { dg-final { scan-assembler-times "vmovsh\[ \\t\]+%xmm\[0-9\]+\[^\n\r\]*%\[er\]\[ad]x+\[^\n\r]*\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovsh\[ \\t\]+\[^\n\r\]*%\[er\]\[ad]x+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovsh\[ \\t\]+\[^\n\r\]*%\[er\]\[ad]x+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovsh\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovsh\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times "vmovsh\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^z\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovsh\[ \\t\]+%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-avx" } */
+
+typedef double v2df __attribute__((vector_size(16)));
+
+v2df move_sd(v2df a, v2df b)
+{
+ v2df result = a;
+ result[0] = b[1];
+ return result;
+}
+
+/* { dg-final { scan-assembler "unpckhpd\[\\t \]%xmm0, %xmm1" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-avx" } */
+
+typedef double v2df __attribute__((vector_size(16)));
+
+v2df move_sd(v2df a, v2df b)
+{
+ v2df result = a;
+ result[1] = b[1];
+ return result;
+}
+
+/* { dg-final { scan-assembler "shufpd\[\\t \]*.2, %xmm1, %xmm0" } } */