]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
x86: Use XMM31 for scratch SSE register
authorH.J. Lu <hjl.tools@gmail.com>
Fri, 16 Jul 2021 17:29:46 +0000 (10:29 -0700)
committerH.J. Lu <hjl.tools@gmail.com>
Tue, 3 Aug 2021 14:11:58 +0000 (07:11 -0700)
In 64-bit mode, use XMM31 for scratch SSE register to avoid vzeroupper
if possible.

gcc/

* config/i386/i386.c (ix86_gen_scratch_sse_rtx): In 64-bit mode,
try XMM31 to avoid vzeroupper.

gcc/testsuite/

* gcc.target/i386/avx-vzeroupper-14.c: Pass -mno-avx512f to
disable XMM31.
* gcc.target/i386/avx-vzeroupper-15.c: Likewise.
* gcc.target/i386/pr82941-1.c: Updated.  Check for vzeroupper.
* gcc.target/i386/pr82942-1.c: Likewise.
* gcc.target/i386/pr82990-1.c: Likewise.
* gcc.target/i386/pr82990-3.c: Likewise.
* gcc.target/i386/pr82990-5.c: Likewise.
* gcc.target/i386/pr100865-4b.c: Likewise.
* gcc.target/i386/pr100865-6b.c: Likewise.
* gcc.target/i386/pr100865-7b.c: Likewise.
* gcc.target/i386/pr100865-10b.c: Likewise.
* gcc.target/i386/pr100865-8b.c: Updated.
* gcc.target/i386/pr100865-9b.c: Likewise.
* gcc.target/i386/pr100865-11b.c: Likewise.
* gcc.target/i386/pr100865-12b.c: Likewise.

16 files changed:
gcc/config/i386/i386.c
gcc/testsuite/gcc.target/i386/avx-vzeroupper-14.c
gcc/testsuite/gcc.target/i386/avx-vzeroupper-15.c
gcc/testsuite/gcc.target/i386/pr100865-10b.c
gcc/testsuite/gcc.target/i386/pr100865-11b.c
gcc/testsuite/gcc.target/i386/pr100865-12b.c
gcc/testsuite/gcc.target/i386/pr100865-4b.c
gcc/testsuite/gcc.target/i386/pr100865-6b.c
gcc/testsuite/gcc.target/i386/pr100865-7b.c
gcc/testsuite/gcc.target/i386/pr100865-8b.c
gcc/testsuite/gcc.target/i386/pr100865-9b.c
gcc/testsuite/gcc.target/i386/pr82941-1.c
gcc/testsuite/gcc.target/i386/pr82942-1.c
gcc/testsuite/gcc.target/i386/pr82990-1.c
gcc/testsuite/gcc.target/i386/pr82990-3.c
gcc/testsuite/gcc.target/i386/pr82990-5.c

index 842eb0e6786b45c6e38e203f3f8fa74e36fca0e1..ec0690876b71d65a6652ee84e7a3a06de367dbd5 100644 (file)
@@ -23335,9 +23335,21 @@ rtx
 ix86_gen_scratch_sse_rtx (machine_mode mode)
 {
   if (TARGET_SSE && !lra_in_progress)
-    return gen_rtx_REG (mode, (TARGET_64BIT
-                              ? LAST_REX_SSE_REG
-                              : LAST_SSE_REG));
+    {
+      unsigned int regno;
+      if (TARGET_64BIT)
+       {
+         /* In 64-bit mode, use XMM31 to avoid vzeroupper and always
+            use XMM31 for CSE.  */
+         if (ix86_hard_regno_mode_ok (LAST_EXT_REX_SSE_REG, mode))
+           regno = LAST_EXT_REX_SSE_REG;
+         else
+           regno = LAST_REX_SSE_REG;
+       }
+      else
+       regno = LAST_SSE_REG;
+      return gen_rtx_REG (mode, regno);
+    }
   else
     return gen_reg_rtx (mode);
 }
index a31b4a2a63aa239acfca61193caa1b825d2b4d3b..9590f25da225472657f32e1f808bdd16c1b5d406 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mavx -mtune=generic -dp" } */
+/* { dg-options "-O2 -mavx -mno-avx512f -mtune=generic -dp" } */
 
 #include <immintrin.h>
 
index 803936eef01e956edaf7dc7272cef32dd26def94..36dcf7367f1dbd5ba942087ddaaf716ae9fad07b 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mavx -mtune=generic -dp" } */
+/* { dg-options "-O2 -mavx -mno-avx512f -mtune=generic -dp" } */
 
 #include <immintrin.h>
 
index e5616d8d258791514788239663b78c7f93078c25..77ace86ffe854088483004d724153332edf02daf 100644 (file)
@@ -5,3 +5,4 @@
 
 /* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%ymm\[0-9\]+, " 8 } } */
+/* { dg-final { scan-assembler-not "vzeroupper" } } */
index 12d55b9a64242a8a033c52ef4ae1416ada50837c..7e458e85cdd4a4275de0264c7efa8ff42f044a32 100644 (file)
@@ -5,4 +5,4 @@
 
 /* { dg-final { scan-assembler-times "movabsq" 1 } } */
 /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa\[\\t \]%xmm\[0-9\]+, " 16 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
index 63a5629b90c172f68733b7809841a48fd3c5bbec..dee0cfb016a1b9c4394593df9d2ea8dfd49eeca3 100644 (file)
@@ -5,4 +5,4 @@
 
 /* { dg-final { scan-assembler-times "movabsq" 1 } } */
 /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa\[\\t \]%xmm\[0-9\]+, " 16 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
index 8e8a7eaaaff3531fcb98b7800b8595f6c5e38e9c..80e9fdb12ea9734bd4a008173759a8b559bed97b 100644 (file)
@@ -5,5 +5,7 @@
 
 /* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
 /* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%ymm\[0-9\]+, " 2 } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-not "vpbroadcastb\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" } } */
 /* { dg-final { scan-assembler-not "vmovdqa" } } */
index 44e74c64e5595014c7def40ad3fff502080686a8..35f2e961d259ac10147ad861eb83aae9f70d8821 100644 (file)
@@ -4,6 +4,9 @@
 #include "pr100865-6a.c"
 
 /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 8 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 8 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovdqu32\[\\t \]%ymm\[0-9\]+, " 8 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-not "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" } } */
 /* { dg-final { scan-assembler-not "vmovdqa" } } */
index 0a68820aa3278ab61c8b1352807143ebfb3cfca9..ad267c43891ee41bde46e0be30b219967afb11fd 100644 (file)
@@ -5,5 +5,8 @@
 
 /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%r\[^\n\]*, %ymm\[0-9\]+" 1 { target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 16 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 16 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovdqu64\[\\t \]%ymm\[0-9\]+, " 16 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-not "vmovdqa" } } */
index 99a10ad83bd085f1bc64e786cd7127770025bed7..4b7dd7cee3e3c91e30f5b02b3f81f902efb9e674 100644 (file)
@@ -4,4 +4,4 @@
 #include "pr100865-8a.c"
 
 /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa\[\\t \]%xmm\[0-9\]+, " 16 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
index 146962485258e408a6733fc8a29424126164cee6..a315dde7c5201dbed98daf6f289d0c296bcb9c65 100644 (file)
@@ -4,4 +4,4 @@
 #include "pr100865-9a.c"
 
 /* { dg-final { scan-assembler-times "vpbroadcastw\[\\t \]+%(?:r|e)\[^\n\]*, %xmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqa\[\\t \]%xmm\[0-9\]+, " 16 } } */
+/* { dg-final { scan-assembler-times "vmovdqa64\[\\t \]%xmm\[0-9\]+, " 16 } } */
index d7e530d511613d384fa9715e9d75074ec9316eb1..c3be2f5b7977b9fadae5cfb703421a633e41e19f 100644 (file)
@@ -11,4 +11,5 @@ pr82941 ()
   z = y;
 }
 
-/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
index 9cdf81a9d6033ff3533b0cc2ea1febbab4cb8e66..29ead049a67fb0b53569e1a94b62b7d4fd6e3ab6 100644 (file)
@@ -3,4 +3,5 @@
 
 #include "pr82941-1.c"
 
-/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
index ff1d6d40eb2688c431bd234eff554d92d5c25f0f..bbf580fea77a7e8c296f98fb935dbb14b3c698d8 100644 (file)
@@ -11,4 +11,5 @@ pr82941 ()
   z = y;
 }
 
-/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
index 201fa98d8d413ff23bffecb0deed9073ec2af216..89ddb20adb32cfb996aac646914c45b92389a3a2 100644 (file)
@@ -3,4 +3,5 @@
 
 #include "pr82941-1.c"
 
-/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
index 9932bdc537568e9724e855db308bab071c0f03c9..b9da0e706b1cf28af9fba763e69335ebad6df968 100644 (file)
@@ -11,4 +11,5 @@ pr82941 ()
   z = y;
 }
 
-/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */