From: H.J. Lu Date: Tue, 14 Apr 2026 03:06:31 +0000 (+0800) Subject: x86: Zero ZMM16-31 when zeroing all call used registers X-Git-Tag: basepoints/gcc-17~211 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ccb02ceb86c5b9d4f2139f6c73a6660bda4a5b1f;p=thirdparty%2Fgcc.git x86: Zero ZMM16-31 when zeroing all call used registers When zeroing all call used registers with AVX512F enabled, zero ZMM16-31 explicitly since vzeroall doesn't touch ZMM16-31. Also add a test for zeroing all call used registers with both AVX512F and APX enabled. gcc/ PR target/124876 * config/i386/i386.cc (ix86_zero_call_used_regs): Zero ZMM16-31 if needed. gcc/testsuite/ PR target/124876 * gcc.target/i386/zero-scratch-regs-23.c: Scan vpxord on ZMM16-31. * gcc.target/i386/zero-scratch-regs-33.c: New test. Signed-off-by: H.J. Lu --- diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 9d1a2af7064..96a988048ae 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -4078,6 +4078,17 @@ ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs) { emit_insn (zero_all_vec_insn); all_sse_zeroed = true; + if (TARGET_64BIT && TARGET_AVX512F) + { + rtx zero = CONST0_RTX (V4SFmode); + for (unsigned int regno = XMM16_REG; + regno <= XMM31_REG; + regno++) + { + rtx reg = gen_rtx_REG (V4SFmode, regno); + emit_move_insn (reg, zero); + } + } } /* mm/st registers are shared registers set, we should follow the following diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-23.c b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-23.c index a3285bed8a0..397893faa6c 100644 --- a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-23.c +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-23.c @@ -7,6 +7,22 @@ foo (void) } /* { dg-final { scan-assembler "vzeroall" } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm16, %zmm16, %zmm16" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm17, %zmm17, %zmm17" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm18, %zmm18, %zmm18" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm19, %zmm19, %zmm19" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm20, %zmm20, %zmm20" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm21, %zmm21, %zmm21" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm22, %zmm22, %zmm22" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm23, %zmm23, %zmm23" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm24, %zmm24, %zmm24" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm25, %zmm25, %zmm25" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm26, %zmm26, %zmm26" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm27, %zmm27, %zmm27" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm28, %zmm28, %zmm28" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm29, %zmm29, %zmm29" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm30, %zmm30, %zmm30" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm31, %zmm31, %zmm31" { target { ! ia32 } } } } */ /* { dg-final { scan-assembler-times "fldz" 8 } } */ /* { dg-final { scan-assembler-times "fstp\[ \t\]+%st\\(0\\)" 8 } } */ /* { dg-final { scan-assembler-not "%xmm" } } */ diff --git a/gcc/testsuite/gcc.target/i386/zero-scratch-regs-33.c b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-33.c new file mode 100644 index 00000000000..f40fe2a5377 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/zero-scratch-regs-33.c @@ -0,0 +1,60 @@ +/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */ +/* { dg-options "-O2 -fzero-call-used-regs=all -march=corei7 -mavx512f -mapxf" } */ + +void +foo (void) +{ +} + +/* { dg-final { scan-assembler "vzeroall" } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm16, %zmm16, %zmm16" } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm17, %zmm17, %zmm17" } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm18, %zmm18, %zmm18" } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm19, %zmm19, %zmm19" } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm20, %zmm20, %zmm20" } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm21, %zmm21, %zmm21" } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm22, %zmm22, %zmm22" } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm23, %zmm23, %zmm23" } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm24, %zmm24, %zmm24" } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm25, %zmm25, %zmm25" } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm26, %zmm26, %zmm26" } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm27, %zmm27, %zmm27" } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm28, %zmm28, %zmm28" } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm29, %zmm29, %zmm29" } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm30, %zmm30, %zmm30" } } */ +/* { dg-final { scan-assembler "vpxord\[ \t\]+%zmm31, %zmm31, %zmm31" } } */ +/* { dg-final { scan-assembler-times "fldz" 8 } } */ +/* { dg-final { scan-assembler-times "fstp\[ \t\]+%st\\(0\\)" 8 } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%eax, %eax" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%edx, %edx" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%ecx, %ecx" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%esi, %esi" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%edi, %edi" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%r8d, %r8d" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%r9d, %r9d" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%r10d, %r10d" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%r11d, %r11d" } } */ +/* { dg-final { scan-assembler "kxorw\[ \t\]+%k0, %k0, %k0" } } */ +/* { dg-final { scan-assembler "kxorw\[ \t\]+%k1, %k1, %k1" } } */ +/* { dg-final { scan-assembler "kxorw\[ \t\]+%k2, %k2, %k2" } } */ +/* { dg-final { scan-assembler "kxorw\[ \t\]+%k3, %k3, %k3" } } */ +/* { dg-final { scan-assembler "kxorw\[ \t\]+%k4, %k4, %k4" } } */ +/* { dg-final { scan-assembler "kxorw\[ \t\]+%k5, %k5, %k5" } } */ +/* { dg-final { scan-assembler "kxorw\[ \t\]+%k6, %k6, %k6" } } */ +/* { dg-final { scan-assembler "kxorw\[ \t\]+%k7, %k7, %k7" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%r16d, %r16d" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%r17d, %r17d" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%r18d, %r18d" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%r19d, %r19d" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%r20d, %r20d" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%r21d, %r21d" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%r22d, %r22d" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%r23d, %r23d" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%r24d, %r24d" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%r25d, %r25d" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%r26d, %r26d" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%r27d, %r27d" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%r28d, %r28d" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%r29d, %r29d" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%r30d, %r30d" } } */ +/* { dg-final { scan-assembler "xorl\[ \t\]+%r31d, %r31d" } } */