]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/ia64/fpu/e_sqrtl.S
Remove "Contributed by" lines
[thirdparty/glibc.git] / sysdeps / ia64 / fpu / e_sqrtl.S
CommitLineData
d5efd131
MF
1.file "sqrtl.s"
2
3
4// Copyright (c) 2000 - 2003, Intel Corporation
5// All rights reserved.
6//
d5efd131
MF
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11//
12// * Redistributions of source code must retain the above copyright
13// notice, this list of conditions and the following disclaimer.
14//
15// * Redistributions in binary form must reproduce the above copyright
16// notice, this list of conditions and the following disclaimer in the
17// documentation and/or other materials provided with the distribution.
18//
19// * The name of Intel Corporation may not be used to endorse or promote
20// products derived from this software without specific prior written
21// permission.
22
0347518d
MF
23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
d5efd131 25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
0347518d 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
d5efd131 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
0347518d
MF
28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
d5efd131 31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
0347518d
MF
32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34//
d5efd131 35// Intel Corporation is the author of this code, and requests that all
0347518d 36// problem reports or change requests be submitted to it directly at
d5efd131
MF
37// http://www.intel.com/software/products/opensource/libraries/num.htm.
38//
39//********************************************************************
40//
41// History:
42// 02/02/00 (hand-optimized)
43// 04/04/00 Unwind support added
44// 08/15/00 Bundle added after call to __libm_error_support to properly
45// set [the previously overwritten] GR_Parameter_RESULT.
46// 05/20/02 Cleaned up namespace and sf0 syntax
47// 02/10/03 Reordered header: .section, .global, .proc, .align
48//
49//********************************************************************
50//
51// Function: Combined sqrtl(x), where
52// _
53// sqrtl(x) = |x, for double-extended precision x values
54//
55//********************************************************************
56//
57// Resources Used:
58//
59// Floating-Point Registers: f8 (Input and Return Value)
60// f7 -f14
61//
62// General Purpose Registers:
63// r32-r36 (Locals)
64// r37-r40 (Used to pass arguments to error handling routine)
65//
66// Predicate Registers: p6, p7, p8
67//
68//********************************************************************
69//
70// IEEE Special Conditions:
71//
72// All faults and exceptions should be raised correctly.
73// sqrtl(QNaN) = QNaN
74// sqrtl(SNaN) = QNaN
75// sqrtl(+/-0) = +/-0
76// sqrtl(negative) = QNaN and error handling is called
77//
78//********************************************************************
79//
80// Implementation:
81//
82// Modified Newton-Raphson Algorithm
83//
84//********************************************************************
85
86GR_SAVE_PFS = r33
87GR_SAVE_B0 = r34
88GR_SAVE_GP = r35
89GR_Parameter_X = r37
90GR_Parameter_Y = r38
91GR_Parameter_RESULT = r39
92GR_Parameter_TAG = r40
93
94FR_X = f15
95FR_Y = f0
96FR_RESULT = f8
97
98.section .text
99GLOBAL_IEEE754_ENTRY(sqrtl)
100{ .mlx
101alloc r32= ar.pfs,0,5,4,0
102 // exponent of +1/2 in r2
103 movl r2 = 0x0fffe;;
104} { .mfi
105 // +1/2 in f10
106 setf.exp f12 = r2
107 // Step (1)
108 // y0 = 1/sqrt(a) in f7
109 frsqrta.s0 f7,p6=f8
110 nop.i 0;;
111} { .mfi
112 nop.m 0
113 // Step (2)
114 // H0 = +1/2 * y0 in f9
115 (p6) fma.s1 f9=f12,f7,f0
116 nop.i 0
117} { .mfi
118 nop.m 0
119 // Step (3)
120 // S0 = a * y0 in f7
121 (p6) fma.s1 f7=f8,f7,f0
122 nop.i 0;;
123} { .mfi
124 nop.m 0
0347518d
MF
125 // Make copy input x
126 mov f13=f8
d5efd131
MF
127 nop.i 0
128} { .mfi
129 nop.m 0
130 fclass.m.unc p7,p8 = f8,0x3A
131 nop.i 0;;
132} { .mfi
133 nop.m 0
134 // Step (4)
135 // d0 = 1/2 - S0 * H0 in f10
136 (p6) fnma.s1 f10=f7,f9,f12
137 nop.i 0;;
0347518d 138}
d5efd131
MF
139{ .mfi
140 nop.m 0
141 mov f15=f8
142 nop.i 0;;
143} { .mfi
144 nop.m 0
145 // Step (5)
146 // H1 = H0 + d0 * H0 in f9
147 (p6) fma.s1 f9=f10,f9,f9
148 nop.i 0
149} { .mfi
150 nop.m 0
151 // Step (6)
152 // S1 = S0 + d0 * S0 in f7
153 (p6) fma.s1 f7=f10,f7,f7
154 nop.i 0;;
155} { .mfi
156 nop.m 0
157 // Step (7)
158 // d1 = 1/2 - S1 * H1 in f10
159 (p6) fnma.s1 f10=f7,f9,f12
160 nop.i 0;;
161} { .mfi
162 nop.m 0
163 // Step (8)
164 // H2 = H1 + d1 * H1 in f9
165 (p6) fma.s1 f9=f10,f9,f9
166 nop.i 0
167} { .mfi
168 nop.m 0
169 // Step (9)
170 // S2 = S1 + d1 * S1 in f7
171 (p6) fma.s1 f7=f10,f7,f7
172 nop.i 0;;
173} { .mfi
174 nop.m 0
175 // Step (10)
176 // d2 = 1/2 - S2 * H2 in f10
177 (p6) fnma.s1 f10=f7,f9,f12
178 nop.i 0
179} { .mfi
180 nop.m 0
181 // Step (11)
182 // e2 = a - S2 * S2 in f12
183 (p6) fnma.s1 f12=f7,f7,f8
184 nop.i 0;;
185} { .mfi
186 nop.m 0
187 // Step (12)
188 // S3 = S2 + d2 * S2 in f7
189 (p6) fma.s1 f7=f12,f9,f7
190 nop.i 0
191} { .mfi
192 nop.m 0
193 // Step (13)
194 // H3 = H2 + d2 * H2 in f9
195 (p6) fma.s1 f9=f10,f9,f9
196 nop.i 0;;
197} { .mfi
198 nop.m 0
199 // Step (14)
200 // e3 = a - S3 * S3 in f12
201 (p6) fnma.s1 f12=f7,f7,f8
202 nop.i 0;;
203} { .mfb
204 nop.m 0
205 // Step (15)
206 // S = S3 + e3 * H3 in f7
207 (p6) fma.s0 f8=f12,f9,f7
208 (p6) br.ret.sptk b0 ;;
209}
210{ .mfb
211 mov GR_Parameter_TAG = 48
212 mov f8 = f7
213 (p8) br.ret.sptk b0 ;;
214}
215//
216// This branch includes all those special values that are not negative,
217// with the result equal to frcpa(x)
218//
219
220
221// END DOUBLE EXTENDED PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
222GLOBAL_IEEE754_END(sqrtl)
3a327316 223libm_alias_ldouble_other (__sqrt, sqrt)
d5efd131
MF
224
225LOCAL_LIBM_ENTRY(__libm_error_region)
226.prologue
227{ .mfi
228 add GR_Parameter_Y=-32,sp // Parameter 2 value
229 nop.f 0
230.save ar.pfs,GR_SAVE_PFS
231 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
232}
233{ .mfi
234.fframe 64
235 add sp=-64,sp // Create new stack
236 nop.f 0
237 mov GR_SAVE_GP=gp // Save gp
238};;
239{ .mmi
240 stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
241 add GR_Parameter_X = 16,sp // Parameter 1 address
242.save b0, GR_SAVE_B0
243 mov GR_SAVE_B0=b0 // Save b0
244};;
245.body
246{ .mib
247 stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
248 add GR_Parameter_RESULT = 0,GR_Parameter_Y
249 nop.b 0 // Parameter 3 address
250}
251{ .mib
252 stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
253 add GR_Parameter_Y = -16,GR_Parameter_Y
254 br.call.sptk b0=__libm_error_support# // Call error handling function
255};;
256{ .mmi
257 nop.m 0
258 nop.m 0
259 add GR_Parameter_RESULT = 48,sp
260};;
261{ .mmi
262 ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
263.restore sp
264 add sp = 64,sp // Restore stack pointer
265 mov b0 = GR_SAVE_B0 // Restore return address
266};;
267{ .mib
268 mov gp = GR_SAVE_GP // Restore gp
269 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
270 br.ret.sptk b0 // Return
271};;
272
273LOCAL_LIBM_END(__libm_error_region#)
274.type __libm_error_support#,@function
275.global __libm_error_support#