]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/ia64/fpu/e_sqrt.S
ia64: strip trailing whitespace
[thirdparty/glibc.git] / sysdeps / ia64 / fpu / e_sqrt.S
CommitLineData
d5efd131
MF
1.file "sqrt.s"
2
3
4// Copyright (c) 2000 - 2003, Intel Corporation
5// All rights reserved.
0347518d 6//
d5efd131 7// Contributed 2000 by the Intel Numerics Group, Intel Corporation
0347518d 8//
d5efd131
MF
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// * Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// * Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// * The name of Intel Corporation may not be used to endorse or promote
21// products derived from this software without specific prior written
22// permission.
23
0347518d
MF
24// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
d5efd131 26// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
0347518d 27// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
d5efd131 28// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
0347518d
MF
29// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
d5efd131 32// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
0347518d
MF
33// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35//
d5efd131 36// Intel Corporation is the author of this code, and requests that all
0347518d 37// problem reports or change requests be submitted to it directly at
d5efd131
MF
38// http://www.intel.com/software/products/opensource/libraries/num.htm.
39//
40//********************************************************************
41// History
42//********************************************************************
43// 02/02/00 Initial version
44// 04/04/00 Unwind support added
45// 08/15/00 Bundle added after call to __libm_error_support to properly
46// set [the previously overwritten] GR_Parameter_RESULT.
47// 02/10/03 Reordered header: .section, .global, .proc, .align
48//
49//********************************************************************
50//
51// Function: Combined sqrt(x), where
52// _
53// sqrt(x) = |x, for double precision x values
54//
55//********************************************************************
56//
57// Accuracy: Correctly Rounded
58//
59//********************************************************************
60//
61// Resources Used:
62//
63// Floating-Point Registers: f8 (Input and Return Value)
64// f7 -f14
65//
66// General Purpose Registers:
67// r32-r36 (Locals)
68// r37-r40 (Used to pass arguments to error handling routine)
69//
70// Predicate Registers: p6, p7, p8
71//
72//*********************************************************************
73//
74// IEEE Special Conditions:
75//
76// All faults and exceptions should be raised correctly.
77// sqrt(QNaN) = QNaN
78// sqrt(SNaN) = QNaN
79// sqrt(+/-0) = +/-0
80// sqrt(negative) = QNaN and error handling is called
81//
82//*********************************************************************
83//
84// Implementation:
85//
86// Modified Newton-Raphson Algorithm
87//
88//*********************************************************************
89
90GR_SAVE_PFS = r33
91GR_SAVE_B0 = r34
92GR_SAVE_GP = r35
93
94GR_Parameter_X = r37
95GR_Parameter_Y = r38
96GR_Parameter_RESULT = r39
97
98
99.section .text
100GLOBAL_IEEE754_ENTRY(sqrt)
101{ .mfi
102 alloc r32= ar.pfs,0,5,4,0
103 frsqrta.s0 f7,p6=f8
104 nop.i 0
105} { .mlx
106 // BEGIN DOUBLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
107 nop.m 0
108 // exponent of +1/2 in r2
109 movl r2 = 0x0fffe;;
110} { .mmi
111 // +1/2 in f9
112 setf.exp f9 = r2
113 nop.m 0
114 nop.i 0
115} { .mlx
116 nop.m 0
117 // 3/2 in r3
118 movl r3=0x3fc00000;;
119} { .mfi
120 setf.s f10=r3
121 // Step (1)
122 // y0 = 1/sqrt(a) in f7
0347518d 123 fclass.m.unc p7,p8 = f8,0x3A
d5efd131
MF
124 nop.i 0;;
125} { .mlx
126 nop.m 0
127 // 5/2 in r2
128 movl r2 = 0x40200000
129} { .mlx
130 nop.m 0
131 // 63/8 in r3
132 movl r3 = 0x40fc0000;;
133} { .mfi
134 setf.s f11=r2
135 // Step (2)
136 // h = +1/2 * y0 in f6
137 (p6) fma.s1 f6=f9,f7,f0
138 nop.i 0
139} { .mfi
140 setf.s f12=r3
141 // Step (3)
142 // g = a * y0 in f7
143 (p6) fma.s1 f7=f8,f7,f0
144 nop.i 0
145} { .mfi
146 nop.m 0
147 mov f15 = f8
148 nop.i 0;;
149} { .mlx
150 nop.m 0
151 // 231/16 in r2
152 movl r2 = 0x41670000;;
153} { .mfi
154 setf.s f13=r2
155 // Step (4)
156 // e = 1/2 - g * h in f9
157 (p6) fnma.s1 f9=f7,f6,f9
158 nop.i 0
159} { .mlx
160 nop.m 0
161 // 35/8 in r3
162 movl r3 = 0x408c0000;;
163} { .mfi
164 setf.s f14=r3
165 // Step (5)
166 // S = 3/2 + 5/2 * e in f10
167 (p6) fma.s1 f10=f11,f9,f10
168 nop.i 0
169} { .mfi
170 nop.m 0
171 // Step (6)
172 // e2 = e * e in f11
173 (p6) fma.s1 f11=f9,f9,f0
174 nop.i 0;;
175} { .mfi
176 nop.m 0
177 // Step (7)
178 // t = 63/8 + 231/16 * e in f12
179 (p6) fma.s1 f12=f13,f9,f12
180 nop.i 0;;
181} { .mfi
182 nop.m 0
183 // Step (8)
184 // S1 = e + e2 * S in f10
185 (p6) fma.s1 f10=f11,f10,f9
186 nop.i 0
187} { .mfi
188 nop.m 0
189 // Step (9)
190 // e4 = e2 * e2 in f11
191 (p6) fma.s1 f11=f11,f11,f0
192 nop.i 0;;
193} { .mfi
194 nop.m 0
195 // Step (10)
196 // t1 = 35/8 + e * t in f9
197 (p6) fma.s1 f9=f9,f12,f14
198 nop.i 0;;
199} { .mfi
200 nop.m 0
201 // Step (11)
202 // G = g + S1 * g in f12
203 (p6) fma.s1 f12=f10,f7,f7
204 nop.i 0
205} { .mfi
206 nop.m 0
207 // Step (12)
208 // E = g * e4 in f7
209 (p6) fma.s1 f7=f7,f11,f0
210 nop.i 0;;
211} { .mfi
212 nop.m 0
213 // Step (13)
214 // u = S1 + e4 * t1 in f10
215 (p6) fma.s1 f10=f11,f9,f10
216 nop.i 0;;
217} { .mfi
218 nop.m 0
219 // Step (14)
220 // g1 = G + t1 * E in f7
221 (p6) fma.d.s1 f7=f9,f7,f12
222 nop.i 0;;
223} { .mfi
224 nop.m 0
225 // Step (15)
226 // h1 = h + u * h in f6
227 (p6) fma.s1 f6=f10,f6,f6
228 nop.i 0;;
229} { .mfi
230 nop.m 0
231 // Step (16)
232 // d = a - g1 * g1 in f9
233 (p6) fnma.s1 f9=f7,f7,f8
234 nop.i 0;;
235} { .mfb
236 nop.m 0
237 // Step (17)
238 // g2 = g1 + d * h1 in f7
239 (p6) fma.d.s0 f8=f9,f6,f7
240 (p6) br.ret.sptk b0 ;;
0347518d 241}
d5efd131
MF
242
243{ .mfb
244 nop.m 0
245 mov f8 = f7
246 (p8) br.ret.sptk b0 ;;
247}
248{ .mfb
249 (p7) mov r40 = 49
250 nop.f 0
251 (p7) br.cond.sptk __libm_error_region ;;
252}
253// END DOUBLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
254GLOBAL_IEEE754_END(sqrt)
255
256// Stack operations when calling error support.
257// (1) (2) (3) (call) (4)
258// sp -> + psp -> + psp -> + sp -> +
259// | | | |
260// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
261// | | | |
262// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
263// | | | |
264// | | <- GR_X X1 ->| |
265// | | | |
266// sp-64 -> + sp -> + sp -> + +
267// save ar.pfs save b0 restore gp
268// save gp restore ar.pfs
269
270
271LOCAL_LIBM_ENTRY(__libm_error_region)
272
273//
274// This branch includes all those special values that are not negative,
275// with the result equal to frcpa(x)
0347518d 276//
d5efd131
MF
277
278.prologue
279// We are distinguishing between over(under)flow and letting
280// __libm_error_support set ERANGE or do anything else needed.
281
282// (1)
283{ .mfi
284 add GR_Parameter_Y=-32,sp // Parameter 2 value
285 nop.f 0
286.save ar.pfs,GR_SAVE_PFS
287 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
288}
289{ .mfi
290.fframe 64
291 add sp=-64,sp // Create new stack
292 nop.f 0
293 mov GR_SAVE_GP=gp // Save gp
294};;
295
296
297// (2)
298{ .mmi
299 stfd [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
300 add GR_Parameter_X = 16,sp // Parameter 1 address
301.save b0, GR_SAVE_B0
302 mov GR_SAVE_B0=b0 // Save b0
303};;
304
305.body
306// (3)
307{ .mib
308 stfd [GR_Parameter_X] = f15 // STORE Parameter 1 on stack
309 add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
0347518d 310 nop.b 0
d5efd131
MF
311}
312{ .mib
313 stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
314 add GR_Parameter_Y = -16,GR_Parameter_Y
315 br.call.sptk b0=__libm_error_support# // Call error handling function
316};;
317{ .mmi
318 nop.m 0
319 nop.m 0
320 add GR_Parameter_RESULT = 48,sp
321};;
322
323// (4)
324{ .mmi
325 ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
326.restore sp
327 add sp = 64,sp // Restore stack pointer
328 mov b0 = GR_SAVE_B0 // Restore return address
329};;
330{ .mib
331 mov gp = GR_SAVE_GP // Restore gp
332 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
333 br.ret.sptk b0 // Return
334};;
335
336LOCAL_LIBM_END(__libm_error_region)
337
338
339
340
341.type __libm_error_support#,@function
342.global __libm_error_support#