]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
x86: Zero ZMM16-31 when zeroing all call used registers
authorH.J. Lu <hjl.tools@gmail.com>
Tue, 14 Apr 2026 03:06:31 +0000 (11:06 +0800)
committerH.J. Lu <hjl.tools@gmail.com>
Tue, 14 Apr 2026 05:21:43 +0000 (13:21 +0800)
When zeroing all call used registers with AVX512F enabled, zero ZMM16-31
explicitly since vzeroall doesn't touch ZMM16-31.  Also add a test for
zeroing all call used registers with both AVX512F and APX enabled.

gcc/

PR target/124876
* config/i386/i386.cc (ix86_zero_call_used_regs): Zero ZMM16-31
if needed.

gcc/testsuite/

PR target/124876
* gcc.target/i386/zero-scratch-regs-23.c: Scan vpxord on ZMM16-31.
* gcc.target/i386/zero-scratch-regs-33.c: New test.

Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
gcc/config/i386/i386.cc
gcc/testsuite/gcc.target/i386/zero-scratch-regs-23.c
gcc/testsuite/gcc.target/i386/zero-scratch-regs-33.c [new file with mode: 0644]

index 9d1a2af70647f3f7a778180a72bcf224ea43b46f..96a988048ae55b725e35184705223bab34a96e17 100644 (file)
@@ -4078,6 +4078,17 @@ ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
     {
       emit_insn (zero_all_vec_insn);
       all_sse_zeroed = true;
+      if (TARGET_64BIT && TARGET_AVX512F)
+       {
+         rtx zero = CONST0_RTX (V4SFmode);
+         for (unsigned int regno = XMM16_REG;
+              regno <= XMM31_REG;
+              regno++)
+           {
+             rtx reg = gen_rtx_REG (V4SFmode, regno);
+             emit_move_insn (reg, zero);
+           }
+       }
     }
 
   /* mm/st registers are shared registers set, we should follow the following
index a3285bed8a092c4bbe90144a0cd2b01f74bf3eb9..397893faa6ca6843234c697c357ac8ffd759c288 100644 (file)
@@ -7,6 +7,22 @@ foo (void)
 }
 
 /* { dg-final { scan-assembler "vzeroall" } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm16, %zmm16, %zmm16" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm17, %zmm17, %zmm17" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm18, %zmm18, %zmm18" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm19, %zmm19, %zmm19" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm20, %zmm20, %zmm20" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm21, %zmm21, %zmm21" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm22, %zmm22, %zmm22" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm23, %zmm23, %zmm23" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm24, %zmm24, %zmm24" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm25, %zmm25, %zmm25" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm26, %zmm26, %zmm26" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm27, %zmm27, %zmm27" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm28, %zmm28, %zmm28" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm29, %zmm29, %zmm29" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm30, %zmm30, %zmm30" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm31, %zmm31, %zmm31" { target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-times "fldz" 8 } } */
 /* { dg-final { scan-assembler-times "fstp\[ \t\]+%st\\(0\\)" 8 } } */
 /* { dg-final { scan-assembler-not "%xmm" } } */
diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-33.c b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-33.c
new file mode 100644 (file)
index 0000000..f40fe2a
--- /dev/null
@@ -0,0 +1,60 @@
+/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */
+/* { dg-options "-O2 -fzero-call-used-regs=all -march=corei7 -mavx512f -mapxf" } */
+
+void
+foo (void)
+{
+}
+
+/* { dg-final { scan-assembler "vzeroall" } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm16, %zmm16, %zmm16" } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm17, %zmm17, %zmm17" } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm18, %zmm18, %zmm18" } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm19, %zmm19, %zmm19" } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm20, %zmm20, %zmm20" } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm21, %zmm21, %zmm21" } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm22, %zmm22, %zmm22" } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm23, %zmm23, %zmm23" } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm24, %zmm24, %zmm24" } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm25, %zmm25, %zmm25" } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm26, %zmm26, %zmm26" } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm27, %zmm27, %zmm27" } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm28, %zmm28, %zmm28" } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm29, %zmm29, %zmm29" } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm30, %zmm30, %zmm30" } } */
+/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm31, %zmm31, %zmm31" } } */
+/* { dg-final { scan-assembler-times "fldz" 8 } } */
+/* { dg-final { scan-assembler-times "fstp\[ \t\]+%st\\(0\\)" 8 } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%eax, %eax" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%edx, %edx" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%ecx, %ecx" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%esi, %esi" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%edi, %edi" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%r8d, %r8d" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%r9d, %r9d" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%r10d, %r10d" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%r11d, %r11d" } } */
+/* { dg-final { scan-assembler "kxorw\[ \t\]+%k0, %k0, %k0" } } */
+/* { dg-final { scan-assembler "kxorw\[ \t\]+%k1, %k1, %k1" } } */
+/* { dg-final { scan-assembler "kxorw\[ \t\]+%k2, %k2, %k2" } } */
+/* { dg-final { scan-assembler "kxorw\[ \t\]+%k3, %k3, %k3" } } */
+/* { dg-final { scan-assembler "kxorw\[ \t\]+%k4, %k4, %k4" } } */
+/* { dg-final { scan-assembler "kxorw\[ \t\]+%k5, %k5, %k5" } } */
+/* { dg-final { scan-assembler "kxorw\[ \t\]+%k6, %k6, %k6" } } */
+/* { dg-final { scan-assembler "kxorw\[ \t\]+%k7, %k7, %k7" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%r16d, %r16d" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%r17d, %r17d" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%r18d, %r18d" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%r19d, %r19d" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%r20d, %r20d" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%r21d, %r21d" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%r22d, %r22d" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%r23d, %r23d" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%r24d, %r24d" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%r25d, %r25d" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%r26d, %r26d" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%r27d, %r27d" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%r28d, %r28d" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%r29d, %r29d" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%r30d, %r30d" } } */
+/* { dg-final { scan-assembler "xorl\[ \t\]+%r31d, %r31d" } } */