(define_insn "avx2_vbroadcasti128_<mode>"
[(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
(vec_concat:VI_256
- (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
+ (match_operand:<ssehalfvecmode> 1 "memory_operand" "jm,m,m")
(match_dup 1)))]
"TARGET_AVX2"
"@
vbroadcasti128\t{%1, %0|%0, %1}
vbroadcast<i128vldq>\t{%1, %0|%0, %1}
vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
- [(set_attr "isa" "*,avx512dq,avx512vl")
+ [(set_attr "isa" "noavx512vl,avx512dq,avx512vl")
(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "vex,evex,evex")
+ (set_attr "addr" "gpr16,*,*")
(set_attr "mode" "OI")])
;; optimize vlddqu + vinserti128 to vbroadcasti128, the former will use
--- /dev/null
+/* { dg-do assemble { target { apxf && { ! ia32 } } } } */
+/* { dg-options "-mavx512vl -mapxf -O2" } */
+
+#include <stdint.h>
+#include <immintrin.h>
+
+void broadcast_avx2(int *sx, __m256i *coeff, __m128i *temp) {
+
+ __m256i semp[8];
+
+
+ for (int i = 0; i < 8; i++)
+ {
+ asm volatile ("" : : : "r8", "r9", "r10", "r11", "r12", "r13",
+ "r14", "r15", "rax", "rcx", "rsi", "rdi", "rdx");
+ register volatile uint64_t sm asm ("%r16") = i;
+ semp[i] = _mm256_broadcastsi128_si256(temp[sm]);
+ }
+
+ coeff[0] = _mm256_unpacklo_epi64(semp[0], semp[1]);
+ coeff[1] = _mm256_unpackhi_epi64(semp[2], semp[3]);
+ coeff[2] = _mm256_unpacklo_epi64(semp[4], semp[5]);
+ coeff[3] = _mm256_unpackhi_epi64(semp[6], semp[7]);
+}