pinsrw is available for both reg and mem operand under sse2.
pextrw requires sse4.1 for mem operands.
The patch change attr "isa" for pinsrw mem alternative from sse4_noavx
to noavx, will enable below optimization.
- movzwl (%rdi), %eax
pxor %xmm1, %xmm1
- pinsrw $0, %eax, %xmm1
+ pinsrw $0, (%rdi), %xmm1
movdqa %xmm1, %xmm0
gcc/ChangeLog:
PR target/105066
* config/i386/sse.md (vec_set<mode>_0): Change attr "isa" of
alternative 4 from sse4_noavx to noavx.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr105066.c: New test.
[(set (attr "isa")
(cond [(eq_attr "alternative" "0,1,2")
(const_string "avx512fp16")
- (eq_attr "alternative" "3")
+ (eq_attr "alternative" "3,4")
(const_string "noavx")
- (eq_attr "alternative" "4,5,6")
+ (eq_attr "alternative" "5,6")
(const_string "sse4_noavx")
(eq_attr "alternative" "7,8,9")
(const_string "avx")
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-sse4.1" } */
+/* { dg-final { scan-assembler-not "movzwl" } } */
+/* { dg-final { scan-assembler {(?n)pinsrw[ \t]+\$0.*\(%} } } */
+
+#include <immintrin.h>
+
+__m128i load16(void *p){
+ return _mm_loadu_si16(p);
+}