[thirdparty/glibc.git] / sysdeps / x86_64 / fpu / svml_d_sincos2_core.S

/* Function sincos vectorized with SSE2.
   Copyright (C) 2014-2017 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#include <sysdep.h>
#include "svml_d_wrapper_impl.h"

	.text
ENTRY (_ZGVbN2vl8l8_sincos)
WRAPPER_IMPL_SSE2_fFF sincos
END (_ZGVbN2vl8l8_sincos)
libmvec_hidden_def (_ZGVbN2vl8l8_sincos)

/* SSE2 ISA version as wrapper to scalar (for vector
   function declared with #pragma omp declare simd notinbranch).  */
.macro WRAPPER_IMPL_SSE2_fFF_vvv callee
#ifndef __ILP32__
        subq      $88, %rsp
        cfi_adjust_cfa_offset(88)
        movaps    %xmm0, 64(%rsp)
        lea       (%rsp), %rdi
        movdqa    %xmm1, 32(%rdi)
        lea       16(%rsp), %rsi
        movdqa    %xmm2, 32(%rsi)
        call      JUMPTARGET(\callee)
        movsd     72(%rsp), %xmm0
        lea       8(%rsp), %rdi
        lea       24(%rsp), %rsi
        call      JUMPTARGET(\callee)
        movq      32(%rsp), %rdx
        movq      48(%rsp), %rsi
        movq      40(%rsp), %r8
        movq      56(%rsp), %r10
        movq      (%rsp), %rax
        movq      16(%rsp), %rcx
        movq      8(%rsp), %rdi
        movq      24(%rsp), %r9
        movq      %rax, (%rdx)
        movq      %rcx, (%rsi)
        movq      %rdi, (%r8)
        movq      %r9, (%r10)
        addq      $88, %rsp
        cfi_adjust_cfa_offset(-88)
        ret
#else
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset 6, -16
        pushq   %rbx
        .cfi_def_cfa_offset 24
        .cfi_offset 3, -24
        subl    $88, %esp
        .cfi_def_cfa_offset 112
        leal    64(%rsp), %esi
        movaps  %xmm1, 32(%esp)
        leal    48(%rsp), %edi
        movaps  %xmm2, 16(%esp)
        movq    %rsi, %rbp
        movq    %rdi, %rbx
        movaps  %xmm0, (%esp)
        call    JUMPTARGET(\callee)
        movupd  8(%esp), %xmm0
        leal    8(%rbp), %esi
        leal    8(%rbx), %edi
        call    JUMPTARGET(\callee)
        movdqa  32(%esp), %xmm1
        movsd   48(%esp), %xmm0
        movq    %xmm1, %rax
        movdqa  16(%esp), %xmm2
        movsd   %xmm0, (%eax)
        movsd   56(%esp), %xmm0
        pextrd  $1, %xmm1, %eax
        movsd   %xmm0, (%eax)
        movsd   64(%esp), %xmm0
        movq    %xmm2, %rax
        movsd   %xmm0, (%eax)
        movsd   72(%esp), %xmm0
        pextrd  $1, %xmm2, %eax
        movsd   %xmm0, (%eax)
        addl    $88, %esp
        .cfi_def_cfa_offset 24
        popq    %rbx
        .cfi_def_cfa_offset 16
        popq    %rbp
        .cfi_def_cfa_offset 8
        ret
#endif
.endm

ENTRY (_ZGVbN2vvv_sincos)
WRAPPER_IMPL_SSE2_fFF_vvv sincos
END (_ZGVbN2vvv_sincos)

#ifndef USE_MULTIARCH
 libmvec_hidden_def (_ZGVbN2vvv_sincos)
#endif
Commit	Line	Data
c9a8c526	1	/* Function sincos vectorized with SSE2.
bfff8b1b	2	Copyright (C) 2014-2017 Free Software Foundation, Inc.
c9a8c526 AS	3	This file is part of the GNU C Library.
	4
	5	The GNU C Library is free software; you can redistribute it and/or
	6	modify it under the terms of the GNU Lesser General Public
	7	License as published by the Free Software Foundation; either
	8	version 2.1 of the License, or (at your option) any later version.
	9
	10	The GNU C Library is distributed in the hope that it will be useful,
	11	but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	Lesser General Public License for more details.
	14
	15	You should have received a copy of the GNU Lesser General Public
	16	License along with the GNU C Library; if not, see
	17	<http://www.gnu.org/licenses/>. */
	18
	19	#include <sysdep.h>
	20	#include "svml_d_wrapper_impl.h"
	21
	22	.text
ee2196bb	23	ENTRY (_ZGVbN2vl8l8_sincos)
c9a8c526	24	WRAPPER_IMPL_SSE2_fFF sincos
ee2196bb AS	25	END (_ZGVbN2vl8l8_sincos)
	26	libmvec_hidden_def (_ZGVbN2vl8l8_sincos)
	27
	28	/* SSE2 ISA version as wrapper to scalar (for vector
	29	function declared with #pragma omp declare simd notinbranch). */
	30	.macro WRAPPER_IMPL_SSE2_fFF_vvv callee
	31	#ifndef __ILP32__
	32	subq $88, %rsp
	33	cfi_adjust_cfa_offset(88)
	34	movaps %xmm0, 64(%rsp)
	35	lea (%rsp), %rdi
	36	movdqa %xmm1, 32(%rdi)
	37	lea 16(%rsp), %rsi
	38	movdqa %xmm2, 32(%rsi)
	39	call JUMPTARGET(\callee)
	40	movsd 72(%rsp), %xmm0
	41	lea 8(%rsp), %rdi
	42	lea 24(%rsp), %rsi
	43	call JUMPTARGET(\callee)
	44	movq 32(%rsp), %rdx
	45	movq 48(%rsp), %rsi
	46	movq 40(%rsp), %r8
	47	movq 56(%rsp), %r10
	48	movq (%rsp), %rax
	49	movq 16(%rsp), %rcx
	50	movq 8(%rsp), %rdi
	51	movq 24(%rsp), %r9
	52	movq %rax, (%rdx)
	53	movq %rcx, (%rsi)
	54	movq %rdi, (%r8)
	55	movq %r9, (%r10)
	56	addq $88, %rsp
	57	cfi_adjust_cfa_offset(-88)
	58	ret
	59	#else
	60	pushq %rbp
	61	.cfi_def_cfa_offset 16
	62	.cfi_offset 6, -16
	63	pushq %rbx
	64	.cfi_def_cfa_offset 24
	65	.cfi_offset 3, -24
	66	subl $88, %esp
	67	.cfi_def_cfa_offset 112
	68	leal 64(%rsp), %esi
	69	movaps %xmm1, 32(%esp)
	70	leal 48(%rsp), %edi
	71	movaps %xmm2, 16(%esp)
	72	movq %rsi, %rbp
	73	movq %rdi, %rbx
	74	movaps %xmm0, (%esp)
	75	call JUMPTARGET(\callee)
	76	movupd 8(%esp), %xmm0
	77	leal 8(%rbp), %esi
	78	leal 8(%rbx), %edi
	79	call JUMPTARGET(\callee)
	80	movdqa 32(%esp), %xmm1
	81	movsd 48(%esp), %xmm0
	82	movq %xmm1, %rax
	83	movdqa 16(%esp), %xmm2
	84	movsd %xmm0, (%eax)
	85	movsd 56(%esp), %xmm0
	86	pextrd $1, %xmm1, %eax
	87	movsd %xmm0, (%eax)
	88	movsd 64(%esp), %xmm0
89	movq %xmm2, %rax
90	movsd %xmm0, (%eax)
91	movsd 72(%esp), %xmm0
92	pextrd $1, %xmm2, %eax
93	movsd %xmm0, (%eax)
94	addl $88, %esp
95	.cfi_def_cfa_offset 24
96	popq %rbx
97	.cfi_def_cfa_offset 16
98	popq %rbp
99	.cfi_def_cfa_offset 8
100	ret
101	#endif
102	.endm
103
104	ENTRY (_ZGVbN2vvv_sincos)
105	WRAPPER_IMPL_SSE2_fFF_vvv sincos
c9a8c526 AS	106	END (_ZGVbN2vvv_sincos)
	107
	108	#ifndef USE_MULTIARCH
	109	libmvec_hidden_def (_ZGVbN2vvv_sincos)
	110	#endif