]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
x86: Don't use vmovdqu16/vmovdqu8 with non-EVEX registers
authorH.J. Lu <hjl.tools@gmail.com>
Fri, 20 Jun 2025 08:07:18 +0000 (16:07 +0800)
committerH.J. Lu <hjl.tools@gmail.com>
Mon, 23 Jun 2025 06:14:22 +0000 (14:14 +0800)
Don't use vmovdqu16/vmovdqu8 with non-EVEX register operands just because
AVX512BW is available.

gcc/

PR target/120728
* config/i386/i386.cc (ix86_get_ssemov): Use vmovdqu16/vmovdqu8
only with EVEX register operands.

gcc/testsuite/

PR target/120728
* gcc.target/i386/avx512bw-vmovdqu16-1.c: Scan vmovdqu for
non-EVEX register operands.
* gcc.target/i386/avx512bw-vmovdqu8-1.c: Likewise.
* gcc.target/i386/avx512fp16-13.c: Likewise.
* gcc.target/i386/pr100865-10b.c: Likewise.
* gcc.target/i386/pr100865-3.c: Likewise.
* gcc.target/i386/pr100865-4b.c: Likewise.
* gcc.target/i386/pr100865-5b.c: Likewise.
* gcc.target/i386/pr90773-15.c: Likewise.
* gcc.target/i386/pr90773-16.c: Likewise.
* gcc.target/i386/pr90773-17.c: Likewise.
* gcc.target/i386/pr95483-5.c: Likewise.
* gcc.target/i386/pr120728.c: New test.

Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
13 files changed:
gcc/config/i386/i386.cc
gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu16-1.c
gcc/testsuite/gcc.target/i386/avx512bw-vmovdqu8-1.c
gcc/testsuite/gcc.target/i386/avx512fp16-13.c
gcc/testsuite/gcc.target/i386/pr100865-10b.c
gcc/testsuite/gcc.target/i386/pr100865-3.c
gcc/testsuite/gcc.target/i386/pr100865-4b.c
gcc/testsuite/gcc.target/i386/pr100865-5b.c
gcc/testsuite/gcc.target/i386/pr120728.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr90773-15.c
gcc/testsuite/gcc.target/i386/pr90773-16.c
gcc/testsuite/gcc.target/i386/pr90773-17.c
gcc/testsuite/gcc.target/i386/pr95483-5.c

index 77853297a2fa549782a6677887b9d3a27b9c8937..fc3105919f453cab48918f379d620e33a649f7fa 100644 (file)
@@ -5703,7 +5703,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
                      : "%vmovaps");
          else
            opcode = (misaligned_p
-                     ? (TARGET_AVX512BW
+                     ? (TARGET_AVX512BW && evex_reg_p
                         ? "vmovdqu16"
                         : "%vmovdqu")
                      : "%vmovdqa");
@@ -5745,7 +5745,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
                      : "%vmovaps");
          else
            opcode = (misaligned_p
-                     ? (TARGET_AVX512BW
+                     ? (TARGET_AVX512BW && evex_reg_p
                         ? "vmovdqu8"
                         : "%vmovdqu")
                      : "%vmovdqa");
@@ -5765,7 +5765,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
                      : "%vmovaps");
          else
            opcode = (misaligned_p
-                     ? (TARGET_AVX512BW
+                     ? (TARGET_AVX512BW && evex_reg_p
                         ? "vmovdqu16"
                         : "%vmovdqu")
                      : "%vmovdqa");
index 8603a1909c792eea004c003f33e4cc8f6224a74d..ee8e5cfc81add675c35a61ec16d7b71ff5248775 100644 (file)
 /* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovdqu16|vinserti128)\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqu|vinserti128)\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovdqu16|vextracti128)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqu|vextracti128)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
index d1e33926c81fdc6a8806751057a0692b2666b44c..4c4cddb17f1cb1cc3b451c9b8609a37a00375479 100644 (file)
@@ -16,9 +16,9 @@
 /* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>
 
index 2416c67de53a4117baa14bbaf959da0c2c4a52ed..92ac197e106624bfdc6c9bb310af7ae1763824d9 100644 (file)
@@ -71,7 +71,7 @@ load256u_ph (void const *p)
   return _mm256_loadu_ph (p);
 }
 
-/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^,\]*,\[^\{\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]*\[^,\]*,\[^\{\n\]*%ymm\[0-9\]" 1 } } */
 
 __m128h
 __attribute__ ((noinline, noclone))
@@ -80,7 +80,7 @@ load128u_ph (void const *p)
   return _mm_loadu_ph (p);
 }
 
-/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^,\]*,\[^\{\n\]*%xmm\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]*\[^,\]*,\[^\{\n\]*%xmm\[0-9\]" 1 } } */
 
 void
 __attribute__ ((noinline, noclone))
@@ -89,7 +89,7 @@ store512u_ph (void *p, __m512h a)
   return _mm512_storeu_ph (p, a);
 }
 
-/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^\{\n\]*%zmm\[0-9\], *\[^,\]*" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]*\[^\{\n\]*%zmm\[0-9\], *\[^,\]*" 1 } } */
 
 void
 __attribute__ ((noinline, noclone))
@@ -98,7 +98,7 @@ store256u_ph (void *p, __m256h a)
   return _mm256_storeu_ph (p, a);
 }
 
-/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^\{\n\]*%ymm\[0-9\], *\[^,\]*" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]*\[^\{\n\]*%ymm\[0-9\], *\[^,\]*" 1 } } */
 
 void
 __attribute__ ((noinline, noclone))
@@ -107,7 +107,7 @@ storeu_ph (void *p, __m128h a)
   return _mm_storeu_ph (p, a);
 }
 
-/* { dg-final { scan-assembler-times "vmovdqu16\[ \\t\]*\[^\{\n\]*%xmm\[0-9\], *\[^,\]*" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]*\[^\{\n\]*%xmm\[0-9\], *\[^,\]*" 1 } } */
 
 __m512h
 __attribute__ ((noinline, noclone))
index f60d1bfd2c78d0f90afeae76a4c97988d65174d0..17847ac972721f123b2a53fc816ee0635d577a5d 100644 (file)
@@ -4,4 +4,4 @@
 #include "pr100865-10a.c"
 
 /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%ymm\[0-9\]+, " 8 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 8 } } */
index 433fd81cb0db2d2690b43e6e39d7ccfc782e4cf9..caa083c8b8a1825abad064ee04311a99cd31c02d 100644 (file)
@@ -11,6 +11,6 @@ foo (void)
 }
 
 /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
 /* { dg-final { scan-assembler-not "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, %xmm\[0-9\]+" } } */
 /* { dg-final { scan-assembler-not "vmovdqa" } } */
index 6fd703e8049fccb252027f2d884f724d5aa6f0a1..7017de9032d9c51354996d13f6ad0a3a1ade0654 100644 (file)
@@ -5,7 +5,7 @@
 #include "pr100865-4a.c"
 
 /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%ymm\[0-9\]+, " 2 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 2 } } */
 /* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
 /* { dg-final { scan-assembler-not "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" } } */
 /* { dg-final { scan-assembler-not "vmovdqa" } } */
index 6c2b33d6c69df87d8cac58417d5e5dabf4bd44d1..8b8ed9356df4c41cb9c746edf9d9681d6315b8a5 100644 (file)
@@ -5,6 +5,6 @@
 #include "pr100865-5a.c"
 
 /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu16\[\\t \]%ymm\[0-9\]+, " 4 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 4 } } */
 /* { dg-final { scan-assembler-not "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" } } */
 /* { dg-final { scan-assembler-not "vmovdqa" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr120728.c b/gcc/testsuite/gcc.target/i386/pr120728.c
new file mode 100644 (file)
index 0000000..93d2cd0
--- /dev/null
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4" } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+, " 3 } } */
+/* { dg-final { scan-assembler-not "vmovdqu8" } } */
+/* { dg-final { scan-assembler-not "vmovdqu16" } } */
+
+typedef char __v32qi __attribute__ ((__vector_size__ (32)));
+typedef char __v32qi_u __attribute__ ((__vector_size__ (32),
+                                      __aligned__ (1)));
+typedef short __v16hi __attribute__ ((__vector_size__ (32)));
+typedef short __v16hi_u __attribute__ ((__vector_size__ (32),
+                                          __aligned__ (1)));
+typedef _Float16 __v16hf __attribute__ ((__vector_size__ (32)));
+typedef _Float16 __v16hf_u __attribute__ ((__vector_size__ (32),
+                                          __aligned__ (1)));
+
+extern __v32qi_u v1;
+extern __v16hi_u v2;
+extern __v16hf_u v3;
+
+void
+foo (__v32qi x1, __v16hi x2, __v16hf x3)
+{
+  v1 = x1;
+  v2 = x2;
+  v3 = x3;
+}
index 403cdb248a20dfd3a2bed0acedc4b20bca7bcc1c..880f71d1567bde579f9dafcf4b205240675a7cd2 100644 (file)
@@ -10,5 +10,5 @@ foo (int c)
 }
 
 /* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%.*, %xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
 /* { dg-final { scan-assembler-times "movb\[\\t \]+%.*, 16\\(%\[\^,\]+\\)" 1 } } */
index bb0aadbc77e94ac2244c82fe4dccc6d4c0469a7f..77d584018b53f020d320898c38375e432a1ae87f 100644 (file)
@@ -10,5 +10,5 @@ foo (void)
 }
 
 /* { dg-final { scan-assembler-times "(?:vpcmpeqd|vpternlogd)" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
 /* { dg-final { scan-assembler-times "movb\[\\t \]+\\\$-1, 16\\(%\[\^,\]+\\)" 1 } } */
index 61b2bfd7485a3f9be4b636cfe575d8cf2d33b396..68ff7e091e594379002a7e119bc9c3447f96edde 100644 (file)
@@ -11,5 +11,5 @@ foo (void)
 }
 
 /* { dg-final { scan-assembler-times "vpbroadcastd" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]+%xmm\[0-9\]+, \\(%\[\^,\]+\\)" 1 } } */
 /* { dg-final { scan-assembler-times "vmovd\[\\t \]+%xmm\[0-9\]+, 16\\(%\[\^,\]+\\)" 1 { xfail *-*-* } } } */
index b52e39dff79f958274803680468f719e7463a1f5..a21ad01b15d22e6c2ea3395c7c8d00c8020b4105 100644 (file)
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512bw -mavx512vl -O2" } */
-/* { dg-final { scan-assembler-times "(?:vmovdqu8|vinserti128)\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "(?:vmovdqu8|vextracti128)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqu|vinserti128)\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "(?:vmovdqu|vextracti128)\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 1 } } */
 
 #include <immintrin.h>