]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/ia64/fpu/e_hypotl.S
(CFLAGS-tst-align.c): Add -mpreferred-stack-boundary=4.
[thirdparty/glibc.git] / sysdeps / ia64 / fpu / e_hypotl.S
1 .file "hypotl.asm"
2
3 // Copyright (C) 2000, 2001, Intel Corporation
4 // All rights reserved.
5 //
6 // Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
7 // Bob Norin, Shane Story, and Ping Tak Peter Tang of the
8 // Computational Software Lab, Intel Corporation.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // * Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // * Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // * The name of Intel Corporation may not be used to endorse or promote
22 // products derived from this software without specific prior written
23 // permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
33 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Intel Corporation is the author of this code, and requests that all
38 // problem reports or change requests be submitted to it directly at
39 // http://developer.intel.com/opensource.
40 //
41 // *********************************************************************
42 //
43 // History:
44 // 2/02/00 hand-optimized
45 // 4/04/00 Unwind support added
46 // 6/20/00 new version
47 // 8/15/00 Bundle added after call to __libm_error_support to properly
48 // set [the previously overwritten] GR_Parameter_RESULT.
49 //
50 // *********************************************************************
51 // ___________
52 // Function: hypotl(x,y) = |(x^2 + y^2) = for double extended values
53 // x and y
54 // Also provides cabsl functionality.
55 //
56 // *********************************************************************
57 //
58 // Resources Used:
59 //
60 // Floating-Point Registers: f8 (Input and Return Value)
61 // f9 (Input)
62 // f6 -f15, f32-f34
63 //
64 // General Purpose Registers:
65 // r2-r3 (Scratch)
66 // r32-r36 (Locals)
67 // r37-r40 (Used to pass arguments to error handling routine)
68 //
69 // Predicate Registers: p6 - p10
70 //
71 // *********************************************************************
72 //
73 // IEEE Special Conditions:
74 //
75 // All faults and exceptions should be raised correctly.
76 // Overflow can occur.
77 // hypotl(Infinity and anything) = +Infinity
78 // hypotl(QNaN and anything) = QNaN
79 // hypotl(SNaN and anything ) = QNaN
80 //
81 // *********************************************************************
82 //
83 // Implementation:
84 // x2 = x * x in double-extended
85 // y2 = y * y in double-extended
86 // temp = x2 + y2 in double-extended
87 // sqrt(temp) rounded to double extended
88 //
89 // *********************************************************************
90
91 #include "libm_support.h"
92
93 GR_SAVE_PFS = r33
94 GR_SAVE_B0 = r34
95 GR_SAVE_GP = r35
96 GR_Parameter_X = r36
97 GR_Parameter_Y = r37
98 GR_Parameter_RESULT = r38
99 GR_Parameter_TAG = r39
100
101 FR_X = f32
102 FR_Y = f33
103 FR_RESULT = f8
104
105 .section .text
106 #ifndef _LIBC
107 .proc cabsl#
108 .global cabsl#
109 cabsl:
110 .endp cabsl
111 #endif
112 .proc hypotl#
113 .global hypotl#
114 .align 64
115
116 hypotl:
117 #ifdef _LIBC
118 .global __hypotl
119 __hypotl:
120 .global __ieee754_hypotl
121 __ieee754_hypotl:
122 #endif
123 {.mfi
124 alloc r32= ar.pfs,0,4,4,0
125 // Compute x*x
126 fma.s1 f10=f8,f8,f0
127 // r2=bias-1
128 mov r2=0xfffe
129 }
130 {.mfi
131 nop.m 0
132 // y*y
133 fma.s1 f11=f9,f9,f0
134 nop.i 0;;
135 }
136
137 { .mfi
138 nop.m 0
139 // Check if x is an Inf - if so return Inf even
140 // if y is a NaN (C9X)
141 fclass.m.unc p7, p6 = f8, 0x023
142 nop.i 0
143 }
144 {.mfi
145 nop.m 0
146 // if possible overflow, copy f8 to f32
147 // set Denormal, if necessary
148 // (p8)
149 fma.s0 f32=f8,f1,f0
150 nop.i 0;;
151 }
152 { .mfi
153 nop.m 0
154 // Check if y is an Inf - if so return Inf even
155 // if x is a NaN (C9X)
156 fclass.m.unc p8, p9 = f9, 0x023
157 nop.i 0
158 }
159 { .mfi
160 nop.m 999
161 // For x=inf, multiply y by 1 to raise invalid on y an SNaN
162 // (p7) fma.s0 f9=f9,f1,f0
163 // copy f9 to f33; set Denormal, if necessary
164 fma.s0 f33=f9,f1,f0
165 nop.i 0;;
166 }
167 {.mfi
168 nop.m 0
169 // is y Zero ?
170 (p6) fclass.m p6,p0=f9,0x7
171 nop.i 0;;
172 }
173
174 {.mfi
175 // f7=0.5
176 setf.exp f7=r2
177 // a=x2+y2
178 fma.s1 f12=f10,f1,f11
179 nop.i 0
180 }
181 {.mfi
182 mov r2=0x408c //0000
183 // dx=x*x-x2
184 fms.s1 f13=f8,f8,f10
185 nop.i 0;;
186 }
187 {.mfi
188 nop.m 0
189 // is x Zero ?
190 (p9) fclass.m p9,p0=f8,0x7
191 shl r2=r2,16
192 }
193 {.mfi
194 nop.m 0
195 // dy=y*y-y2
196 fms.s1 f14=f9,f9,f11
197 nop.i 0;;
198 }
199
200 {.mfi
201 nop.m 0
202 // x not NaN ?
203 (p6) fclass.m p7,p0=f8,0x3f
204 nop.i 0
205 }
206 {.mfi
207 nop.m 0
208 // f6=2
209 fma.s1 f6=f1,f1,f1
210 nop.i 0;;
211 }
212
213 {.mfi
214 nop.m 0
215 // f34=min(x2,y2)
216 famin.s1 f34=f10,f11
217 nop.i 0
218 }
219 {.mfb
220 nop.m 0
221 // f10=max(x2,y2)
222 famax.s1 f10=f11,f10
223 nop.b 0;; //
224 }
225
226 {.mfi
227 nop.m 0
228 // y not NaN ?
229 (p9) fclass.m p8,p0=f9,0x3f
230 nop.i 0;;
231 }
232 {.mfb
233 // f9=35/8
234 setf.s f9=r2
235 // if f8=Infinity or f9=Zero, return |f8|
236 (p7) fmerge.s f8=f0,f32
237 (p7) br.ret.spnt b0;;
238 }
239
240
241 {.mfi
242 nop.m 0
243 // z0=frsqrta(a)
244 frsqrta.s1 f8,p6=f12
245 nop.i 0;;
246 }
247 { .mfi
248 nop.m 0
249 // Identify Natvals, Infs, NaNs, and Zeros
250 // and return result
251 fclass.m.unc p7, p0 = f12, 0x1E7
252 nop.i 0
253 }
254 {.mfi
255 // get exponent of x^2+y^2
256 getf.exp r3=f12
257 // dxy=dx+dy
258 fma.s1 f13=f13,f1,f14
259 nop.i 0;;
260 }
261
262 {.mfb
263 // 2*emax-2
264 mov r2=0x17ffb
265 // if f9=Infinity or f8=Zero, return |f9|
266 (p8) fmerge.s f8=f0,f33
267 (p8) br.ret.spnt b0
268 }
269 {.mfi
270 nop.m 0
271 // dd=a-max(x2,y2)
272 fnma.s1 f10=f10,f1,f12
273 nop.i 0;;
274 }
275
276 {.mfi
277 nop.m 0
278 // S0=a*z0
279 (p6) fma.s1 f14=f12,f8,f0
280 nop.i 0
281 }
282 {.mfi
283 nop.m 0
284 // H0=0.5*z0
285 (p6) fma.s1 f15=f8,f7,f0
286 nop.i 0;;
287 }
288
289 {.mfb
290 nop.m 0
291 // if special case, set f8
292 (p7) mov f8=f12
293 (p7) br.ret.spnt b0
294 }
295 {.mfi
296 nop.m 0
297 // da=min(x2,y2)-dd
298 fnma.s1 f10=f10,f1,f34
299 nop.i 0;;
300 }
301 {.mfi
302 nop.m 0
303 // f6=5/2
304 fma.s1 f6=f7,f1,f6
305 nop.i 0
306 }
307 {.mfi
308 nop.m 0
309 // f11=3/2
310 fma.s1 f11=f7,f1,f1
311 nop.i 0;;
312 }
313
314 {.mfi
315 nop.m 0
316 // d=0.5-S0*H0
317 (p6) fnma.s1 f7=f14,f15,f7
318 nop.i 0;;
319 }
320
321 {.mfi
322 nop.m 0
323 // P1=3/2*d+1
324 (p6) fma.s1 f11=f11,f7,f1
325 nop.i 0
326 }
327 {.mfi
328 nop.m 0
329 // P2=35/8*d+5/2
330 (p6) fma.s1 f9=f9,f7,f6
331 nop.i 0;;
332 }
333 {.mfi
334 nop.m 0
335 // d2=d*d
336 (p6) fma.s1 f34=f7,f7,f0
337 nop.i 0;;
338 }
339
340 {.mfi
341 nop.m 0
342 // T0=d*S0
343 (p6) fma.s1 f6=f7,f14,f0
344 nop.i 0
345 }
346 {.mfi
347 nop.m 0
348 // G0=d*H0
349 (p6) fma.s1 f7=f7,f15,f0
350 nop.i 0;;
351 }
352 {.mfi
353 nop.m 0
354 // P=d2*P2+P1
355 (p6) fma.s1 f11=f34,f9,f11
356 nop.i 0;;
357 }
358
359 {.mfi
360 nop.m 0
361 // S1=p*T0+S0
362 (p6) fma.s1 f14=f11,f6,f14
363 nop.i 0
364 }
365 {.mfi
366 nop.m 0
367 // H1=p*G0+H0
368 (p6) fma.s1 f15=f11,f7,f15
369 nop.i 0;;
370 }
371
372
373 {.mfi
374 nop.m 0
375 // e1=a-S1*S1
376 (p6) fnma.s1 f7=f14,f14,f12
377 nop.i 0
378 }
379 {.mfi
380 // Is x^2 + y^2 well less than the overflow
381 // threshold?
382 (p6) cmp.lt.unc p7, p8 = r3,r2
383 // c=dxy+da
384 (p6) fma.s1 f13=f13,f1,f10
385 nop.i 0;;
386 }
387
388 {.mfi
389 nop.m 0
390 // e=e1+c
391 (p6) fma.s1 f13=f7,f1,f13
392 nop.i 0;;
393 }
394
395 {.mfb
396 nop.m 0
397 // S=e*H1+S1
398 fma.s0 f8=f13,f15,f14
399 // No overflow in this case
400 (p7) br.ret.sptk b0;;
401 }
402
403 { .mfi
404 nop.m 0
405 (p8) fsetc.s2 0x7F,0x42
406 // Possible overflow path, must detect by
407 // Setting widest range exponent with prevailing
408 // rounding mode.
409 nop.i 0 ;;
410 }
411
412
413 { .mfi
414 // bias+0x4000 (bias+EMAX+1)
415 (p8) mov r2=0x13fff
416 // S=e*H1+S1
417 (p8) fma.s2 f12=f13,f15,f14
418 nop.i 0 ;;
419 }
420 { .mfi
421 (p8) setf.exp f11 = r2
422 (p8) fsetc.s2 0x7F,0x40
423 // Restore Original Mode in S2
424 nop.i 0 ;;
425 }
426 { .mfi
427 nop.m 0
428 (p8) fcmp.lt.unc.s1 p9, p10 = f12, f11
429 nop.i 0 ;;
430 }
431 { .mib
432 nop.m 0
433 mov GR_Parameter_TAG = 45;
434 // No overflow
435 (p9) br.ret.sptk b0;;
436 }
437 .endp hypotl
438 ASM_SIZE_DIRECTIVE(hypotl)
439
440 .proc __libm_error_region
441 __libm_error_region:
442 .prologue
443 { .mfi
444 add GR_Parameter_Y=-32,sp // Parameter 2 value
445 nop.f 0
446 .save ar.pfs,GR_SAVE_PFS
447 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
448 }
449 { .mfi
450 .fframe 64
451 add sp=-64,sp // Create new stack
452 nop.f 0
453 mov GR_SAVE_GP=gp // Save gp
454 };;
455 { .mmi
456 stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
457 add GR_Parameter_X = 16,sp // Parameter 1 address
458 .save b0, GR_SAVE_B0
459 mov GR_SAVE_B0=b0 // Save b0
460 };;
461 .body
462 { .mib
463 stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
464 add GR_Parameter_RESULT = 0,GR_Parameter_Y
465 nop.b 0 // Parameter 3 address
466 }
467 { .mib
468 stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
469 add GR_Parameter_Y = -16,GR_Parameter_Y
470 br.call.sptk b0=__libm_error_support# // Call error handling function
471 };;
472 { .mmi
473 nop.m 0
474 nop.m 0
475 add GR_Parameter_RESULT = 48,sp
476 };;
477 { .mmi
478 ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
479 .restore sp
480 add sp = 64,sp // Restore stack pointer
481 mov b0 = GR_SAVE_B0 // Restore return address
482 };;
483 { .mib
484 mov gp = GR_SAVE_GP // Restore gp
485 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
486 br.ret.sptk b0 // Return
487 };;
488 .endp __libm_error_region
489 ASM_SIZE_DIRECTIVE(__libm_error_region)
490 .type __libm_error_support#,@function
491 .global __libm_error_support#