From 6a6f2cbf9ab366ea38cbe4c8d574486ce2bbe873 Mon Sep 17 00:00:00 2001 From: Iain Sandoe Date: Sat, 3 Dec 2022 20:51:54 +0000 Subject: [PATCH] testsuite, X86, Darwin: Fix bf16 ABI tests for Mach-O/MacOS ABI. These tests have failed since introduction since they assume that the assembler output is ELF and that the ABI targeted supports the addressing. For Darwin, Mach-O and ABI we need to make several changes: 1. Use the __USER_LABEL__PREFIX__ 2. Remove the use of ELF-specific constructs (.size, .type etc.) 3. We cannot make direct access to common variables in the ABI, so that we must move these to BSS. These changes are made in darwin-specific asm files. Signed-off-by: Iain Sandoe gcc/testsuite/ChangeLog: * gcc.target/x86_64/abi/bf16/abi-bf16.exp: Use separate asm for Darwin. * gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp: Likewise. * gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp: Likewise. * gcc.target/x86_64/abi/bf16/args.h: Make xmm_regs, x87_regs extern. * gcc.target/x86_64/abi/bf16/m256bf16/args.h: Likewise. * gcc.target/x86_64/abi/bf16/m512bf16/args.h: Likewise. * gcc.target/x86_64/abi/bf16/asm-support-darwin.S: New file. * gcc.target/x86_64/abi/bf16/m256bf16/asm-support-darwin.S: New file. * gcc.target/x86_64/abi/bf16/m512bf16/asm-support-darwin.S: New file. --- .../gcc.target/x86_64/abi/bf16/abi-bf16.exp | 12 +- .../gcc.target/x86_64/abi/bf16/args.h | 4 +- .../x86_64/abi/bf16/asm-support-darwin.S | 97 +++++++++++++++ .../x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp | 12 +- .../x86_64/abi/bf16/m256bf16/args.h | 4 +- .../abi/bf16/m256bf16/asm-support-darwin.S | 97 +++++++++++++++ .../x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp | 12 +- .../x86_64/abi/bf16/m512bf16/args.h | 4 +- .../abi/bf16/m512bf16/asm-support-darwin.S | 113 ++++++++++++++++++ 9 files changed, 340 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support-darwin.S create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support-darwin.S create mode 100644 gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support-darwin.S diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/abi-bf16.exp b/gcc/testsuite/gcc.target/x86_64/abi/bf16/abi-bf16.exp index bd386f2a560b..8edab855dd07 100644 --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/abi-bf16.exp +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/abi-bf16.exp @@ -36,9 +36,15 @@ set additional_flags "-W -Wall -msse2" foreach src [lsort [glob -nocomplain $srcdir/$subdir/test_*.c]] { if {[runtest_file_p $runtests $src]} { - c-torture-execute [list $src \ - $srcdir/$subdir/asm-support.S] \ - $additional_flags + if { ([istarget *-*-darwin*]) } then { + c-torture-execute [list $src \ + $srcdir/$subdir/asm-support-darwin.S] \ + $additional_flags + } else { + c-torture-execute [list $src \ + $srcdir/$subdir/asm-support.S] \ + $additional_flags + } } } diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h index 11d7e2b3a1ca..95f9a394f2c3 100644 --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/args.h @@ -42,8 +42,8 @@ typedef union { } X87_T; extern void (*callthis)(void); extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15; -XMM_T xmm_regs[16]; -X87_T x87_regs[8]; +extern XMM_T xmm_regs[16]; +extern X87_T x87_regs[8]; extern volatile unsigned long long volatile_var; extern void snapshot (void); extern void snapshot_ret (void); diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support-darwin.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support-darwin.S new file mode 100644 index 000000000000..bdaa02fe1877 --- /dev/null +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/asm-support-darwin.S @@ -0,0 +1,97 @@ + .text + .p2align 4,,15 + .globl _snapshot +_snapshot: +LFB3: + movq %rax, _rax(%rip) + movq %rbx, _rbx(%rip) + movq %rcx, _rcx(%rip) + movq %rdx, _rdx(%rip) + movq %rdi, _rdi(%rip) + movq %rsi, _rsi(%rip) + movq %rbp, _rbp(%rip) + movq %rsp, _rsp(%rip) + movq %r8, _r8(%rip) + movq %r9, _r9(%rip) + movq %r10, _r10(%rip) + movq %r11, _r11(%rip) + movq %r12, _r12(%rip) + movq %r13, _r13(%rip) + movq %r14, _r14(%rip) + movq %r15, _r15(%rip) + movdqu %xmm0, _xmm_regs+0(%rip) + movdqu %xmm1, _xmm_regs+16(%rip) + movdqu %xmm2, _xmm_regs+32(%rip) + movdqu %xmm3, _xmm_regs+48(%rip) + movdqu %xmm4, _xmm_regs+64(%rip) + movdqu %xmm5, _xmm_regs+80(%rip) + movdqu %xmm6, _xmm_regs+96(%rip) + movdqu %xmm7, _xmm_regs+112(%rip) + movdqu %xmm8, _xmm_regs+128(%rip) + movdqu %xmm9, _xmm_regs+144(%rip) + movdqu %xmm10, _xmm_regs+160(%rip) + movdqu %xmm11, _xmm_regs+176(%rip) + movdqu %xmm12, _xmm_regs+192(%rip) + movdqu %xmm13, _xmm_regs+208(%rip) + movdqu %xmm14, _xmm_regs+224(%rip) + movdqu %xmm15, _xmm_regs+240(%rip) + jmp *_callthis(%rip) +LFE3: + + .p2align 4,,15 + .globl _snapshot_ret +_snapshot_ret: + movq %rdi, _rdi(%rip) + subq $8, %rsp + call *_callthis(%rip) + addq $8, %rsp + movq %rax, _rax(%rip) + movq %rdx, _rdx(%rip) + movdqu %xmm0, _xmm_regs+0(%rip) + movdqu %xmm1, _xmm_regs+16(%rip) + fstpt _x87_regs(%rip) + fstpt _x87_regs+16(%rip) + fldt _x87_regs+16(%rip) + fldt _x87_regs(%rip) + ret + + .globl _callthis + .zerofill __DATA,__bss,_callthis,8,3 + .globl _rax + .zerofill __DATA,__bss,_rax,8,3 + .globl _rbx + .zerofill __DATA,__bss,_rbx,8,3 + .globl _rcx + .zerofill __DATA,__bss,_rcx,8,3 + .globl _rdx + .zerofill __DATA,__bss,_rdx,8,3 + .globl _rsi + .zerofill __DATA,__bss,_rsi,8,3 + .globl _rdi + .zerofill __DATA,__bss,_rdi,8,3 + .globl _rsp + .zerofill __DATA,__bss,_rsp,8,3 + .globl _rbp + .zerofill __DATA,__bss,_rbp,8,3 + .globl _r8 + .zerofill __DATA,__bss,_r8,8,3 + .globl _r9 + .zerofill __DATA,__bss,_r9,8,3 + .globl _r10 + .zerofill __DATA,__bss,_r10,8,3 + .globl _r11 + .zerofill __DATA,__bss,_r11,8,3 + .globl _r12 + .zerofill __DATA,__bss,_r12,8,3 + .globl _r13 + .zerofill __DATA,__bss,_r13,8,3 + .globl _r14 + .zerofill __DATA,__bss,_r14,8,3 + .globl _r15 + .zerofill __DATA,__bss,_r15,8,3 + .globl _xmm_regs + .zerofill __DATA,__bss,_xmm_regs,256,5 + .globl _x87_regs + .zerofill __DATA,__bss,_x87_regs,128,5 + .globl _volatile_var + .zerofill __DATA,__bss,_volatile_var,8,3 diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp index 309db8ff12e0..02b45052b325 100644 --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/abi-bf16-ymm.exp @@ -36,9 +36,15 @@ set additional_flags "-W -Wall -mavx2" foreach src [lsort [glob -nocomplain $srcdir/$subdir/test_*.c]] { if {[runtest_file_p $runtests $src]} { - c-torture-execute [list $src \ - $srcdir/$subdir/asm-support.S] \ - $additional_flags + if { ([istarget *-*-darwin*]) } then { + c-torture-execute [list $src \ + $srcdir/$subdir/asm-support-darwin.S] \ + $additional_flags + } else { + c-torture-execute [list $src \ + $srcdir/$subdir/asm-support.S] \ + $additional_flags + } } } diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h index 94627ffbd446..1027742cbb2e 100644 --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/args.h @@ -50,8 +50,8 @@ typedef union { } X87_T; extern void (*callthis)(void); extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15; -YMM_T ymm_regs[16]; -X87_T x87_regs[8]; +extern YMM_T ymm_regs[16]; +extern X87_T x87_regs[8]; extern volatile unsigned long long volatile_var; extern void snapshot (void); extern void snapshot_ret (void); diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support-darwin.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support-darwin.S new file mode 100644 index 000000000000..e136b574f6e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m256bf16/asm-support-darwin.S @@ -0,0 +1,97 @@ + .text + .p2align 4,,15 + .globl _snapshot +_snapshot: +.LFB3: + movq %rax, _rax(%rip) + movq %rbx, _rbx(%rip) + movq %rcx, _rcx(%rip) + movq %rdx, _rdx(%rip) + movq %rdi, _rdi(%rip) + movq %rsi, _rsi(%rip) + movq %rbp, _rbp(%rip) + movq %rsp, _rsp(%rip) + movq %r8, _r8(%rip) + movq %r9, _r9(%rip) + movq %r10, _r10(%rip) + movq %r11, _r11(%rip) + movq %r12, _r12(%rip) + movq %r13, _r13(%rip) + movq %r14, _r14(%rip) + movq %r15, _r15(%rip) + vmovdqu %ymm0, _ymm_regs+0(%rip) + vmovdqu %ymm1, _ymm_regs+32(%rip) + vmovdqu %ymm2, _ymm_regs+64(%rip) + vmovdqu %ymm3, _ymm_regs+96(%rip) + vmovdqu %ymm4, _ymm_regs+128(%rip) + vmovdqu %ymm5, _ymm_regs+160(%rip) + vmovdqu %ymm6, _ymm_regs+192(%rip) + vmovdqu %ymm7, _ymm_regs+224(%rip) + vmovdqu %ymm8, _ymm_regs+256(%rip) + vmovdqu %ymm9, _ymm_regs+288(%rip) + vmovdqu %ymm10, _ymm_regs+320(%rip) + vmovdqu %ymm11, _ymm_regs+352(%rip) + vmovdqu %ymm12, _ymm_regs+384(%rip) + vmovdqu %ymm13, _ymm_regs+416(%rip) + vmovdqu %ymm14, _ymm_regs+448(%rip) + vmovdqu %ymm15, _ymm_regs+480(%rip) + jmp *_callthis(%rip) +.LFE3: + + .p2align 4,,15 + .globl _snapshot_ret +_snapshot_ret: + movq %rdi, _rdi(%rip) + subq $8, %rsp + call *_callthis(%rip) + addq $8, %rsp + movq %rax, _rax(%rip) + movq %rdx, _rdx(%rip) + vmovdqu %ymm0, _ymm_regs+0(%rip) + vmovdqu %ymm1, _ymm_regs+32(%rip) + fstpt _x87_regs(%rip) + fstpt _x87_regs+16(%rip) + fldt _x87_regs+16(%rip) + fldt _x87_regs(%rip) + ret + + .globl _callthis + .zerofill __DATA,__bss,_callthis,8,3 + .globl _rax + .zerofill __DATA,__bss,_rax,8,3 + .globl _rbx + .zerofill __DATA,__bss,_rbx,8,3 + .globl _rcx + .zerofill __DATA,__bss,_rcx,8,3 + .globl _rdx + .zerofill __DATA,__bss,_rdx,8,3 + .globl _rsi + .zerofill __DATA,__bss,_rsi,8,3 + .globl _rdi + .zerofill __DATA,__bss,_rdi,8,3 + .globl _rsp + .zerofill __DATA,__bss,_rsp,8,3 + .globl _rbp + .zerofill __DATA,__bss,_rbp,8,3 + .globl _r8 + .zerofill __DATA,__bss,_r8,8,3 + .globl _r9 + .zerofill __DATA,__bss,_r9,8,3 + .globl _r10 + .zerofill __DATA,__bss,_r10,8,3 + .globl _r11 + .zerofill __DATA,__bss,_r11,8,3 + .globl _r12 + .zerofill __DATA,__bss,_r12,8,3 + .globl _r13 + .zerofill __DATA,__bss,_r13,8,3 + .globl _r14 + .zerofill __DATA,__bss,_r14,8,3 + .globl _r15 + .zerofill __DATA,__bss,_r15,8,3 + .globl _ymm_regs + .zerofill __DATA,__bss,_ymm_regs,512,5 + .globl _x87_regs + .zerofill __DATA,__bss,_x87_regs,128,5 + .globl _volatile_var + .zerofill __DATA,__bss,_volatile_var,8,3 diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp index b6e0fed4cb42..28abb4e876be 100644 --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/abi-bf16-zmm.exp @@ -36,9 +36,15 @@ set additional_flags "-W -Wall -mavx512f" foreach src [lsort [glob -nocomplain $srcdir/$subdir/test_*.c]] { if {[runtest_file_p $runtests $src]} { - c-torture-execute [list $src \ - $srcdir/$subdir/asm-support.S] \ - $additional_flags + if { ([istarget *-*-darwin*]) } then { + c-torture-execute [list $src \ + $srcdir/$subdir/asm-support-darwin.S] \ + $additional_flags + } else { + c-torture-execute [list $src \ + $srcdir/$subdir/asm-support.S] \ + $additional_flags + } } } diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h index 64b247838336..f9710bae347b 100644 --- a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/args.h @@ -51,8 +51,8 @@ typedef union { } X87_T; extern void (*callthis)(void); extern unsigned long long rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15; -ZMM_T zmm_regs[32]; -X87_T x87_regs[8]; +extern ZMM_T zmm_regs[32]; +extern X87_T x87_regs[8]; extern volatile unsigned long long volatile_var; extern void snapshot (void); extern void snapshot_ret (void); diff --git a/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support-darwin.S b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support-darwin.S new file mode 100644 index 000000000000..71b61b36b4f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/x86_64/abi/bf16/m512bf16/asm-support-darwin.S @@ -0,0 +1,113 @@ + .text + .p2align 4,,15 + .globl _snapshot +_snapshot: +.LFB3: + movq %rax, _rax(%rip) + movq %rbx, _rbx(%rip) + movq %rcx, _rcx(%rip) + movq %rdx, _rdx(%rip) + movq %rdi, _rdi(%rip) + movq %rsi, _rsi(%rip) + movq %rbp, _rbp(%rip) + movq %rsp, _rsp(%rip) + movq %r8, _r8(%rip) + movq %r9, _r9(%rip) + movq %r10, _r10(%rip) + movq %r11, _r11(%rip) + movq %r12, _r12(%rip) + movq %r13, _r13(%rip) + movq %r14, _r14(%rip) + movq %r15, _r15(%rip) + vmovdqu32 %zmm0, _zmm_regs+0(%rip) + vmovdqu32 %zmm1, _zmm_regs+64(%rip) + vmovdqu32 %zmm2, _zmm_regs+128(%rip) + vmovdqu32 %zmm3, _zmm_regs+192(%rip) + vmovdqu32 %zmm4, _zmm_regs+256(%rip) + vmovdqu32 %zmm5, _zmm_regs+320(%rip) + vmovdqu32 %zmm6, _zmm_regs+384(%rip) + vmovdqu32 %zmm7, _zmm_regs+448(%rip) + vmovdqu32 %zmm8, _zmm_regs+512(%rip) + vmovdqu32 %zmm9, _zmm_regs+576(%rip) + vmovdqu32 %zmm10, _zmm_regs+640(%rip) + vmovdqu32 %zmm11, _zmm_regs+704(%rip) + vmovdqu32 %zmm12, _zmm_regs+768(%rip) + vmovdqu32 %zmm13, _zmm_regs+832(%rip) + vmovdqu32 %zmm14, _zmm_regs+896(%rip) + vmovdqu32 %zmm15, _zmm_regs+960(%rip) + vmovdqu32 %zmm16, _zmm_regs+1024(%rip) + vmovdqu32 %zmm17, _zmm_regs+1088(%rip) + vmovdqu32 %zmm18, _zmm_regs+1152(%rip) + vmovdqu32 %zmm19, _zmm_regs+1216(%rip) + vmovdqu32 %zmm20, _zmm_regs+1280(%rip) + vmovdqu32 %zmm21, _zmm_regs+1344(%rip) + vmovdqu32 %zmm22, _zmm_regs+1408(%rip) + vmovdqu32 %zmm23, _zmm_regs+1472(%rip) + vmovdqu32 %zmm24, _zmm_regs+1536(%rip) + vmovdqu32 %zmm25, _zmm_regs+1600(%rip) + vmovdqu32 %zmm26, _zmm_regs+1664(%rip) + vmovdqu32 %zmm27, _zmm_regs+1728(%rip) + vmovdqu32 %zmm28, _zmm_regs+1792(%rip) + vmovdqu32 %zmm29, _zmm_regs+1856(%rip) + vmovdqu32 %zmm30, _zmm_regs+1920(%rip) + vmovdqu32 %zmm31, _zmm_regs+1984(%rip) + jmp *_callthis(%rip) +.LFE3: + + .p2align 4,,15 + .globl _snapshot_ret +_snapshot_ret: + movq %rdi, _rdi(%rip) + subq $8, %rsp + call *_callthis(%rip) + addq $8, %rsp + movq %rax, _rax(%rip) + movq %rdx, _rdx(%rip) + vmovdqu32 %zmm0, _zmm_regs+0(%rip) + vmovdqu32 %zmm1, _zmm_regs+64(%rip) + fstpt _x87_regs(%rip) + fstpt _x87_regs+16(%rip) + fldt _x87_regs+16(%rip) + fldt _x87_regs(%rip) + ret + + .globl _callthis + .zerofill __DATA,__bss,_callthis,8,3 + .globl _rax + .zerofill __DATA,__bss,_rax,8,3 + .globl _rbx + .zerofill __DATA,__bss,_rbx,8,3 + .globl _rcx + .zerofill __DATA,__bss,_rcx,8,3 + .globl _rdx + .zerofill __DATA,__bss,_rdx,8,3 + .globl _rsi + .zerofill __DATA,__bss,_rsi,8,3 + .globl _rdi + .zerofill __DATA,__bss,_rdi,8,3 + .globl _rsp + .zerofill __DATA,__bss,_rsp,8,3 + .globl _rbp + .zerofill __DATA,__bss,_rbp,8,3 + .globl _r8 + .zerofill __DATA,__bss,_r8,8,3 + .globl _r9 + .zerofill __DATA,__bss,_r9,8,3 + .globl _r10 + .zerofill __DATA,__bss,_r10,8,3 + .globl _r11 + .zerofill __DATA,__bss,_r11,8,3 + .globl _r12 + .zerofill __DATA,__bss,_r12,8,3 + .globl _r13 + .zerofill __DATA,__bss,_r13,8,3 + .globl _r14 + .zerofill __DATA,__bss,_r14,8,3 + .globl _r15 + .zerofill __DATA,__bss,_r15,8,3 + .globl _zmm_regs + .zerofill __DATA,__bss,_zmm_regs,2048,6 + .globl _x87_regs + .zerofill __DATA,__bss,_x87_regs,128,5 + .globl _volatile_var + .zerofill __DATA,__bss,_volatile_var,8,3 -- 2.47.2