[thirdparty/glibc.git] / sysdeps / ia64 / fpu / s_tanf.S

.file "tancotf.s"


// Copyright (c) 2000 - 2005, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// * The name of Intel Corporation may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.

// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
// 04/04/00 Unwind support added
// 12/27/00 Improved speed
// 02/21/01 Updated to call tanl
// 05/30/02 Improved speed, added cotf.
// 11/25/02 Added explicit completer on fnorm
// 02/10/03 Reordered header: .section, .global, .proc, .align
// 04/17/03 Eliminated redundant stop bits
// 03/31/05 Reformatted delimiters between data tables
//
// APIs
//==============================================================
// float tanf(float)
// float cotf(float)
//
// Algorithm Description for tanf
//==============================================================
// The tanf function computes the principle value of the tangent of x,
// where x is radian argument.
//
// There are 5 paths:
// 1. x = +/-0.0
//    Return tanf(x) = +/-0.0
//
// 2. x = [S,Q]NaN
//    Return tanf(x) = QNaN
//
// 3. x = +/-Inf
//    Return tanf(x) = QNaN
//
// 4. x = r + (Pi/2)*N, N = RoundInt(x*(2/Pi)), N is even, |r|<Pi/4
//    Return tanf(x) = P19(r) = A1*r + A3*r^3 + A5*r^5 + ... + A19*r^19 =
//    = r*(A1 + A3*t + A5*t^2 + ... + A19*t^9) = r*P9(t), where t = r^2
//
// 5. x = r + (Pi/2)*N, N = RoundInt(x*(2/Pi)), N is odd, |r|<Pi/4
//    Return tanf(x) = -1/r + P11(r) = -1/r + B1*r + B3*r^3 + ... + B11*r^11 =
//    = -1/r + r*(B1 + B3*t + B5*t^2 + ... + B11*t^5) = -1/r + r*P11(t),
//    where t = r^2
//
// Algorithm Description for cotf
//==============================================================
// The cotf function computes the principle value of the cotangent of x,
// where x is radian argument.
//
// There are 5 paths:
// 1. x = +/-0.0
//    Return cotf(x) = +/-Inf and error handling is called
//
// 2. x = [S,Q]NaN
//    Return cotf(x) = QNaN
//
// 3. x = +/-Inf
//    Return cotf(x) = QNaN
//
// 4. x = r + (Pi/2)*N, N = RoundInt(x*(2/Pi)), N is odd, |r|<Pi/4
//    Return cotf(x) = P19(-r) = A1*(-r) + A3*(-r^3) + ... + A19*(-r^19) =
//    = -r*(A1 + A3*t + A5*t^2 + ... + A19*t^9) = -r*P9(t), where t = r^2
//
// 5. x = r + (Pi/2)*N, N = RoundInt(x*(2/Pi)), N is even, |r|<Pi/4
//    Return cotf(x) = 1/r + P11(-r) = 1/r + B1*(-r) + ... + B11*(-r^11) =
//    = 1/r - r*(B1 + B3*t + B5*t^2 + ... + B11*t^5) = 1/r - r*P11(t),
//    where t = r^2
//
//    We set p10 and clear p11 if computing tanf, vice versa for cotf.
//
//
// Registers used
//==============================================================
// Floating Point registers used:
// f8, input
// f32 -> f80
//
// General registers used:
// r14 -> r23, r32 -> r39
//
// Predicate registers used:
// p6 -> p13
//
// Assembly macros
//==============================================================
// integer registers
rExp                        = r14
rSignMask                   = r15
rRshf                       = r16
rScFctrExp                  = r17
rIntN                       = r18
rSigRcpPiby2                = r19
rScRshf                     = r20
rCoeffA                     = r21
rCoeffB                     = r22
rExpCut                     = r23

GR_SAVE_B0                  = r33
GR_SAVE_PFS                 = r34
GR_SAVE_GP                  = r35
GR_Parameter_X              = r36
GR_Parameter_Y              = r37
GR_Parameter_RESULT         = r38
GR_Parameter_Tag            = r39

//==============================================================
// floating point registers
fScRcpPiby2                 = f32
fScRshf                     = f33
fNormArg                    = f34
fScFctr                     = f35
fRshf                       = f36
fShiftedN                   = f37
fN                          = f38
fR                          = f39
fA01                        = f40
fA03                        = f41
fA05                        = f42
fA07                        = f43
fA09                        = f44
fA11                        = f45
fA13                        = f46
fA15                        = f47
fA17                        = f48
fA19                        = f49
fB01                        = f50
fB03                        = f51
fB05                        = f52
fB07                        = f53
fB09                        = f54
fB11                        = f55
fA03_01                     = f56
fA07_05                     = f57
fA11_09                     = f58
fA15_13                     = f59
fA19_17                     = f60
fA11_05                     = f61
fA19_13                     = f62
fA19_05                     = f63
fRbyA03_01                  = f64
fB03_01                     = f65
fB07_05                     = f66
fB11_09                     = f67
fB11_05                     = f68
fRbyB03_01                  = f69
fRbyB11_01                  = f70
fRp2                        = f71
fRp4                        = f72
fRp8                        = f73
fRp5                        = f74
fY0                         = f75
fY1                         = f76
fD                          = f77
fDp2                        = f78
fInvR                       = f79
fPiby2                      = f80
//==============================================================


RODATA
.align 16

LOCAL_OBJECT_START(coeff_A)
data8 0x3FF0000000000000 // A1  = 1.00000000000000000000e+00
data8 0x3FD5555556BCE758 // A3  = 3.33333334641442641606e-01
data8 0x3FC111105C2DAE48 // A5  = 1.33333249100689099175e-01
data8 0x3FABA1F876341060 // A7  = 5.39701122561673229739e-02
data8 0x3F965FB86D12A38D // A9  = 2.18495194027670719750e-02
data8 0x3F8265F62415F9D6 // A11 = 8.98353860497717439465e-03
data8 0x3F69E3AE64CCF58D // A13 = 3.16032468108912746342e-03
data8 0x3F63920D09D0E6F6 // A15 = 2.38897844840557235331e-03
LOCAL_OBJECT_END(coeff_A)

LOCAL_OBJECT_START(coeff_B)
data8 0xC90FDAA22168C235, 0x3FFF // pi/2
data8 0x3FD55555555358DB // B1  = 3.33333333326107426583e-01
data8 0x3F96C16C252F643F // B3  = 2.22222230621336129239e-02
data8 0x3F61566243AB3C60 // B5  = 2.11638633968606896785e-03
data8 0x3F2BC1169BD4438B // B7  = 2.11748132564551094391e-04
data8 0x3EF611B4CEA056A1 // B9  = 2.10467959860990200942e-05
data8 0x3EC600F9E32194BF // B11 = 2.62305891234274186608e-06
data8 0xBF42BA7BCC177616 // A17 =-5.71546981685324877205e-04
data8 0x3F4F2614BC6D3BB8 // A19 = 9.50584530849832782542e-04
LOCAL_OBJECT_END(coeff_B)


.section .text

LOCAL_LIBM_ENTRY(cotf)

{ .mlx
Commit	Line	Data
d5efd131 MF	1	.file "tancotf.s"
	2
	3
	4	// Copyright (c) 2000 - 2005, Intel Corporation
	5	// All rights reserved.
	6	//
	7	// Contributed 2000 by the Intel Numerics Group, Intel Corporation
	8	//
	9	// Redistribution and use in source and binary forms, with or without
	10	// modification, are permitted provided that the following conditions are
	11	// met:
	12	//
	13	// * Redistributions of source code must retain the above copyright
	14	// notice, this list of conditions and the following disclaimer.
	15	//
	16	// * Redistributions in binary form must reproduce the above copyright
	17	// notice, this list of conditions and the following disclaimer in the
	18	// documentation and/or other materials provided with the distribution.
	19	//
	20	// * The name of Intel Corporation may not be used to endorse or promote
	21	// products derived from this software without specific prior written
	22	// permission.
	23
0347518d MF	24	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
0347518d MF	25	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
d5efd131	26	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
0347518d	27	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
d5efd131	28	// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
0347518d MF	29	// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
	30	// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
	31	// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
d5efd131	32	// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
0347518d MF	33	// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
0347518d MF	34	// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
d5efd131 MF	35	//
d5efd131 MF	36	// Intel Corporation is the author of this code, and requests that all
0347518d	37	// problem reports or change requests be submitted to it directly at
d5efd131 MF	38	// http://www.intel.com/software/products/opensource/libraries/num.htm.
	39	//
	40	// History
	41	//==============================================================
	42	// 02/02/00 Initial version
	43	// 04/04/00 Unwind support added
	44	// 12/27/00 Improved speed
	45	// 02/21/01 Updated to call tanl
	46	// 05/30/02 Improved speed, added cotf.
	47	// 11/25/02 Added explicit completer on fnorm
	48	// 02/10/03 Reordered header: .section, .global, .proc, .align
	49	// 04/17/03 Eliminated redundant stop bits
	50	// 03/31/05 Reformatted delimiters between data tables
	51	//
	52	// APIs
	53	//==============================================================
	54	// float tanf(float)
	55	// float cotf(float)
	56	//
	57	// Algorithm Description for tanf
	58	//==============================================================
	59	// The tanf function computes the principle value of the tangent of x,
	60	// where x is radian argument.
	61	//
	62	// There are 5 paths:
	63	// 1. x = +/-0.0
	64	// Return tanf(x) = +/-0.0
	65	//
	66	// 2. x = [S,Q]NaN
	67	// Return tanf(x) = QNaN
	68	//
	69	// 3. x = +/-Inf
	70	// Return tanf(x) = QNaN
	71	//
	72	// 4. x = r + (Pi/2)N, N = RoundInt(x(2/Pi)), N is even, \|r\|<Pi/4
	73	// Return tanf(x) = P19(r) = A1r + A3r^3 + A5r^5 + ... + A19r^19 =
	74	// = r(A1 + A3t + A5t^2 + ... + A19t^9) = r*P9(t), where t = r^2
	75	//
	76	// 5. x = r + (Pi/2)N, N = RoundInt(x(2/Pi)), N is odd, \|r\|<Pi/4
	77	// Return tanf(x) = -1/r + P11(r) = -1/r + B1r + B3r^3 + ... + B11*r^11 =
	78	// = -1/r + r(B1 + B3t + B5t^2 + ... + B11t^5) = -1/r + r*P11(t),
	79	// where t = r^2
	80	//
	81	// Algorithm Description for cotf
	82	//==============================================================
	83	// The cotf function computes the principle value of the cotangent of x,
	84	// where x is radian argument.
	85	//
	86	// There are 5 paths:
	87	// 1. x = +/-0.0
	88	// Return cotf(x) = +/-Inf and error handling is called
	89	//
	90	// 2. x = [S,Q]NaN
	91	// Return cotf(x) = QNaN
	92	//
	93	// 3. x = +/-Inf
	94	// Return cotf(x) = QNaN
	95	//
	96	// 4. x = r + (Pi/2)N, N = RoundInt(x(2/Pi)), N is odd, \|r\|<Pi/4
	97	// Return cotf(x) = P19(-r) = A1(-r) + A3(-r^3) + ... + A19*(-r^19) =
	98	// = -r(A1 + A3t + A5t^2 + ... + A19t^9) = -r*P9(t), where t = r^2
	99	//
	100	// 5. x = r + (Pi/2)N, N = RoundInt(x(2/Pi)), N is even, \|r\|<Pi/4
	101	// Return cotf(x) = 1/r + P11(-r) = 1/r + B1(-r) + ... + B11(-r^11) =
102	// = 1/r - r(B1 + B3t + B5t^2 + ... + B11t^5) = 1/r - r*P11(t),
103	// where t = r^2
104	//
105	// We set p10 and clear p11 if computing tanf, vice versa for cotf.
106	//
107	//
108	// Registers used
109	//==============================================================
110	// Floating Point registers used:
111	// f8, input
112	// f32 -> f80
113	//
114	// General registers used:
115	// r14 -> r23, r32 -> r39
116	//
117	// Predicate registers used:
118	// p6 -> p13
119	//
120	// Assembly macros
121	//==============================================================
122	// integer registers
123	rExp = r14
124	rSignMask = r15
125	rRshf = r16
126	rScFctrExp = r17
127	rIntN = r18
128	rSigRcpPiby2 = r19
129	rScRshf = r20
130	rCoeffA = r21
131	rCoeffB = r22
132	rExpCut = r23
133
134	GR_SAVE_B0 = r33
135	GR_SAVE_PFS = r34
136	GR_SAVE_GP = r35
137	GR_Parameter_X = r36
138	GR_Parameter_Y = r37
139	GR_Parameter_RESULT = r38
140	GR_Parameter_Tag = r39
141
142	//==============================================================
143	// floating point registers
144	fScRcpPiby2 = f32
145	fScRshf = f33
146	fNormArg = f34
147	fScFctr = f35
148	fRshf = f36
149	fShiftedN = f37
150	fN = f38
151	fR = f39
152	fA01 = f40
153	fA03 = f41
154	fA05 = f42
155	fA07 = f43
156	fA09 = f44
157	fA11 = f45
158	fA13 = f46
159	fA15 = f47
160	fA17 = f48
161	fA19 = f49
162	fB01 = f50
163	fB03 = f51
164	fB05 = f52
165	fB07 = f53
166	fB09 = f54
167	fB11 = f55
168	fA03_01 = f56
169	fA07_05 = f57
170	fA11_09 = f58
171	fA15_13 = f59
172	fA19_17 = f60
173	fA11_05 = f61
174	fA19_13 = f62
175	fA19_05 = f63
176	fRbyA03_01 = f64
177	fB03_01 = f65
178	fB07_05 = f66
179	fB11_09 = f67
180	fB11_05 = f68
181	fRbyB03_01 = f69
182	fRbyB11_01 = f70
183	fRp2 = f71
184	fRp4 = f72
185	fRp8 = f73
186	fRp5 = f74
187	fY0 = f75
188	fY1 = f76
189	fD = f77
190	fDp2 = f78
191	fInvR = f79
192	fPiby2 = f80
193	//==============================================================
194
195
196	RODATA
197	.align 16
198
199	LOCAL_OBJECT_START(coeff_A)
200	data8 0x3FF0000000000000 // A1 = 1.00000000000000000000e+00
201	data8 0x3FD5555556BCE758 // A3 = 3.33333334641442641606e-01
202	data8 0x3FC111105C2DAE48 // A5 = 1.33333249100689099175e-01
203	data8 0x3FABA1F876341060 // A7 = 5.39701122561673229739e-02
204	data8 0x3F965FB86D12A38D // A9 = 2.18495194027670719750e-02
205	data8 0x3F8265F62415F9D6 // A11 = 8.98353860497717439465e-03
206	data8 0x3F69E3AE64CCF58D // A13 = 3.16032468108912746342e-03
207	data8 0x3F63920D09D0E6F6 // A15 = 2.38897844840557235331e-03
208	LOCAL_OBJECT_END(coeff_A)
209
210	LOCAL_OBJECT_START(coeff_B)
211	data8 0xC90FDAA22168C235, 0x3FFF // pi/2
212	data8 0x3FD55555555358DB // B1 = 3.33333333326107426583e-01
213	data8 0x3F96C16C252F643F // B3 = 2.22222230621336129239e-02
214	data8 0x3F61566243AB3C60 // B5 = 2.11638633968606896785e-03
215	data8 0x3F2BC1169BD4438B // B7 = 2.11748132564551094391e-04
216	data8 0x3EF611B4CEA056A1 // B9 = 2.10467959860990200942e-05
217	data8 0x3EC600F9E32194BF // B11 = 2.62305891234274186608e-06
218	data8 0xBF42BA7BCC177616 // A17 =-5.71546981685324877205e-04
219	data8 0x3F4F2614BC6D3BB8 // A19 = 9.50584530849832782542e-04
220	LOCAL_OBJECT_END(coeff_B)
221
222
223	.section .text
224
225	LOCAL_LIBM_ENTRY(cotf)
226
227	{ .mlx
228