]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/ia64/fpu/e_remainderl.S
(CFLAGS-tst-align.c): Add -mpreferred-stack-boundary=4.
[thirdparty/glibc.git] / sysdeps / ia64 / fpu / e_remainderl.S
CommitLineData
a334319f
UD
1.file "remainderl.asm"
2// Copyright (C) 2000, 2001, Intel Corporation
8da2915d
UD
3// All rights reserved.
4//
a334319f
UD
5// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
6// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
7// Software Lab, Intel Corporation.
8da2915d 8//
aeb25823
AJ
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// * Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// * Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// * The name of Intel Corporation may not be used to endorse or promote
21// products derived from this software without specific prior written
22// permission.
a334319f 23//
8da2915d
UD
24// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
28// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
32// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
33// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35//
36// Intel Corporation is the author of this code, and requests that all
37// problem reports or change requests be submitted to it directly at
a334319f 38// http://developer.intel.com/opensource.
8da2915d
UD
39//
40// History
41//====================================================================
a334319f
UD
42// 2/02/00 Initial version
43// 3/02/00 New algorithm
44// 4/04/00 Unwind support added
45// 7/21/00 Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
46// 8/15/00 Bundle added after call to __libm_error_support to properly
8da2915d 47// set [the previously overwritten] GR_Parameter_RESULT.
a334319f 48//11/29/00 Set FR_Y to f9
8da2915d
UD
49//
50// API
51//====================================================================
52// long double remainderl(long double,long double);
53//
54// Overview of operation
55//====================================================================
56// remainder(a,b)=a-i*b,
57// where i is an integer such that, if b!=0 and a is finite,
58// |a/b-i|<=1/2. If |a/b-i|=1/2, i is even.
59//
60// Algorithm
61//====================================================================
62// a). eliminate special cases
63// b). if |a/b|<0.25 (first quotient estimate), return a
64// c). use single precision divide algorithm to get quotient q
65// rounded to 24 bits of precision
66// d). calculate partial remainders (using both q and q-ulp);
67// select one and RZ(a/b) based on the sign of |a|-|b|*q
68// e). if the exponent difference (exponent(a)-exponent(b))
69// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b)
70// and sticky bits to round to integer; exit loop and
71// calculate final remainder
72// f). if exponent(a)-exponent(b)>=24, select new value of a as
73// the partial remainder calculated using RZ(a/b);
74// repeat from c).
75//
76// Special cases
77//====================================================================
78// a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
79// a=NaN or b=NaN: return NaN
a334319f
UD
80
81#include "libm_support.h"
82
8da2915d
UD
83//
84// Registers used
85//====================================================================
86// Predicate registers: p6-p14
87// General registers: r2,r3,r28,r29,r32 (ar.pfs), r33-r39
88// Floating point registers: f6-f15,f32
89//
a334319f
UD
90.section .text
91
8da2915d
UD
92
93GR_SAVE_B0 = r33
94GR_SAVE_PFS = r34
95GR_SAVE_GP = r35
96GR_SAVE_SP = r36
97
98GR_Parameter_X = r37
99GR_Parameter_Y = r38
100GR_Parameter_RESULT = r39
101GR_Parameter_TAG = r40
102
103FR_X = f10
104FR_Y = f9
105FR_RESULT = f8
106
107
108
a334319f
UD
109
110 .proc remainderl#
111 .align 32
112 .global remainderl#
113 .align 32
114
115remainderl:
116#ifdef _LIBC
117.global __remainderl
118.type __remainderl,@function
119__remainderl:
120#endif
8da2915d
UD
121// inputs in f8, f9
122// result in f8
123
124{ .mfi
125 alloc r32=ar.pfs,1,4,4,0
126 // f13=|a|
127 fmerge.s f13=f0,f8
128 nop.i 0
129}
130 {.mfi
131 getf.sig r29=f9
132 // f14=|b|
133 fmerge.s f14=f0,f9
134 nop.i 0;;
135}
136 {.mlx
137 mov r28=0x2ffdd
138 // r2=2^{23}
139 movl r3=0x4b000000;;
140}
141
142
143{.mmi
144setf.exp f32=r28
145nop.m 0
146// y pseudo-zero ?
147cmp.eq p11,p10=r29,r0;;
148}
149
150// Y +-NAN, +-inf, +-0? p11
151{ .mfi
152 nop.m 999
153(p10) fclass.m p11,p10 = f9, 0xe7
154 nop.i 999
155}
156// qnan snan inf norm unorm 0 -+
157// 1 1 1 0 0 0 11
158// e 3
159// X +-NAN, +-inf, ? p9
160{ .mfi
161 nop.m 999
a334319f 162(p0) fclass.m.unc p9,p8 = f8, 0xe3
8da2915d
UD
163 nop.i 999;;
164}
165
166{.mfi
167 nop.m 0
168 mov f12=f0
169 nop.i 0
170}
171{ .mfi
172 // set p7=1
173 cmp.eq.unc p7,p0=r0,r0
174 // Step (1)
175 // y0 = 1 / b in f10
176 frcpa.s1 f10,p6=f13,f14
177 nop.i 0;;
178}
179// Y +-NAN, +-inf, +-0? p11
180{ .mfi
181 nop.m 999
182 // pseudo-NaN ?
183(p10) fclass.nm p11,p0 = f9, 0xff
184 nop.i 999
185}
186
187// qnan snan inf norm unorm 0 -+
188// 1 1 1 0 0 0 11
189// e 3
190// X +-NAN, +-inf, ? p9
191
192{ .mfi
193 nop.m 999
194(p8) fclass.nm p9,p0 = f8, 0xff
195 nop.i 999;;
196}
197
198{.bbb
a334319f
UD
199 (p9) br.cond.spnt L(FREM_X_NAN_INF)
200 (p11) br.cond.spnt L(FREM_Y_NAN_INF_ZERO)
8da2915d
UD
201 nop.b 0
202} {.mfi
203 nop.m 0
204 // set D flag if a (f8) is denormal
205 fnma.s0 f6=f8,f1,f8
206 nop.i 0;;
207}
208
a334319f 209L(remloop24):
8da2915d
UD
210 { .mfi
211 nop.m 0
212 // Step (2)
213 // q0 = a * y0 in f15
214 (p6) fma.s1 f12=f13,f10,f0
215 nop.i 0
216} { .mfi
217 nop.m 0
218 // Step (3)
219 // e0 = 1 - b * y0 in f7
220 (p6) fnma.s1 f7=f14,f10,f1
221 nop.i 0;;
222} {.mlx
223 nop.m 0
224 // r2=1.25*2^{-24}
225 movl r2=0x33a00000;;
226}
227
228{.mfi
229 nop.m 0
230 // q1=q0*(1+e0)
a334319f 231 fma.s1 f15=f12,f7,f12
8da2915d
UD
232 nop.i 0
233}
234{ .mfi
235 nop.m 0
236 // Step (4)
237 // e1 = e0 * e0 + E in f7
238 (p6) fma.s1 f7=f7,f7,f32
239 nop.i 0;;
240}
241 {.mii
242 (p7) getf.exp r29=f12
243 (p7) mov r28=0xfffd
244 nop.i 0;;
245}
246
247 { .mfi
248 // f12=2^{23}
249 setf.s f12=r3
250 // Step (5)
251 // q2 = q1 + e1 * q1 in f11
252 (p6) fma.s.s1 f11=f7,f15,f15
253 nop.i 0
254} { .mfi
255 nop.m 0
256 // Step (6)
257 // q2 = q1 + e1 * q1 in f6
258 (p6) fma.s1 f6=f7,f15,f15
259 nop.i 0;;
260}
261
262 {.mmi
263 // f15=1.25*2^{-24}
264 setf.s f15=r2
265 // q<1/4 ? (i.e. expon< -2)
266 (p7) cmp.gt p7,p0=r28,r29
267 nop.i 0;;
268}
269
270{.mfb
271 // r29= -32+bias
272 mov r29=0xffdf
273 // if |a/b|<1/4, set D flag before returning
274 (p7) fma.s0 f9=f9,f0,f8
275 nop.b 0;;
276}
277 {.mfb
278 nop.m 0
279 // can be combined with bundle above if sign of 0 or
280 // FTZ enabled are not important
281 (p7) fmerge.s f8=f8,f9
282 // return if |a|<4*|b| (estimated quotient < 1/4)
283 (p7) br.ret.spnt b0;;
284}
285 {.mfi
286 // f7=2^{-32}
287 setf.exp f7=r29
288 // set f8 to current a value | sign
289 fmerge.s f8=f8,f13
290 nop.i 0;;
291}
292 {.mfi
293 getf.exp r28=f6
294 // last step ? (q<2^{23})
295 fcmp.lt.unc.s1 p0,p12=f6,f12
296 nop.i 0;;
297}
298 {.mfi
299 nop.m 0
300 // r=a-b*q
301 fnma.s1 f6=f14,f11,f13
302 nop.i 0
303} {.mfi
304 // r2=23+bias
305 mov r2=0xffff+23
306 // q'=q-q*(1.25*2^{-24}) (q'=q-ulp)
307 fnma.s.s1 f15=f11,f15,f11
308 nop.i 0;;
309}
310 {.mmi
311 nop.m 0
312 cmp.eq p11,p14=r2,r28
313 nop.i 0;;
314}
315
316.pred.rel "mutex",p11,p14
317 {.mfi
318 nop.m 0
319 // if exp_q=2^23, then r=a-b*2^{23}
320 (p11) fnma.s1 f13=f12,f14,f13
321 nop.i 0
322}
323{.mfi
324 nop.m 0
325 // r2=a-b*q'
326 (p14) fnma.s1 f13=f14,f15,f13
327 nop.i 0;;
328}
329 {.mfi
330 nop.m 0
331 // r>0 iff q=RZ(a/b) and inexact
332 fcmp.gt.unc.s1 p8,p0=f6,f0
333 nop.i 0
334} {.mfi
335 nop.m 0
336 // r<0 iff q'=RZ(a/b) and inexact
337 (p14) fcmp.lt.unc.s1 p9,p10=f6,f0
338 nop.i 0;;
339}
340
341.pred.rel "mutex",p8,p9
342 {.mfi
343 nop.m 0
344 // (p8) Q=q+(last iteration ? sticky bits:0)
345 // i.e. Q=q+q*x (x=2^{-32} or 0)
346 (p8) fma.s1 f11=f11,f7,f11
347 nop.i 0
348} {.mfi
349 nop.m 0
350 // (p9) Q=q'+(last iteration ? sticky bits:0)
351 // i.e. Q=q'+q'*x (x=2^{-32} or 0)
352 (p9) fma.s1 f11=f15,f7,f15
353 nop.i 0;;
354}
355
356 {.mfb
357 nop.m 0
358 // (p9) set r=r2 (new a, if not last iteration)
359 // (p10) new a =r
360 (p10) mov f13=f6
a334319f 361 (p12) br.cond.sptk L(remloop24);;
8da2915d
UD
362}
363
364// last iteration
365 {.mfi
366 nop.m 0
367 // set f9=|b|*sgn(a)
368 fmerge.s f9=f8,f9
369 nop.i 0
370}
371 {.mfi
372 nop.m 0
373 // round to integer
374 fcvt.fx.s1 f11=f11
375 nop.i 0;;
376}
377 {.mfi
378 nop.m 0
379 // save sign of a
380 fmerge.s f7=f8,f8
381 nop.i 0
382} {.mfi
383 nop.m 0
384 // normalize
385 fcvt.xf f11=f11
386 nop.i 0;;
387}
388 {.mfi
389 nop.m 0
390 // This can be removed if sign of 0 is not important
391 // get remainder using sf1
392 fnma.s1 f12=f9,f11,f8
393 nop.i 0
394}
395 {.mfi
396 nop.m 0
397 // get remainder
398 fnma.s0 f8=f9,f11,f8
399 nop.i 0;;
400}
401 {.mfi
402 nop.m 0
403 // f12=0?
404 // This can be removed if sign of 0 is not important
405 fcmp.eq.unc.s1 p8,p0=f12,f0
406 nop.i 0;;
407}
408 {.mfb
409 nop.m 0
410 // if f8=0, set sign correctly
411 // This can be removed if sign of 0 is not important
412 (p8) fmerge.s f8=f7,f8
413 // return
414 br.ret.sptk b0;;
415}
416
417
418
a334319f 419L(FREM_X_NAN_INF):
8da2915d
UD
420
421// Y zero ?
422{.mfi
423 nop.m 0
424 fma.s1 f10=f9,f1,f0
425 nop.i 0;;
426}
427{.mfi
428 nop.m 0
429 fcmp.eq.unc.s1 p11,p0=f10,f0
430 nop.i 0;;
431}
432{.mib
433 nop.m 0
434 nop.i 0
435 // if Y zero
a334319f 436 (p11) br.cond.spnt L(FREM_Y_ZERO);;
8da2915d
UD
437}
438
439// X infinity? Return QNAN indefinite
440{ .mfi
441 nop.m 999
a334319f 442(p0) fclass.m.unc p8,p0 = f8, 0x23
8da2915d
UD
443 nop.i 999
444}
445// X infinity? Return QNAN indefinite
446{ .mfi
447 nop.m 999
a334319f 448(p0) fclass.m.unc p11,p0 = f8, 0x23
8da2915d
UD
449 nop.i 999;;
450}
451// Y NaN ?
452{.mfi
453 nop.m 999
454(p8) fclass.m.unc p0,p8=f9,0xc3
455 nop.i 0;;
456}
457{.mfi
458 nop.m 999
459 // also set Denormal flag if necessary
460(p8) fnma.s0 f9=f9,f1,f9
461 nop.i 0
462}
463{ .mfi
464 nop.m 999
465(p8) frcpa.s0 f8,p7 = f8,f8
466 nop.i 999 ;;
467}
468
469{.mfi
470 nop.m 999
471(p11) mov f10=f8
472 nop.i 0
473}
474{ .mfi
475 nop.m 999
a334319f 476(p8) fma f8=f8,f1,f0
8da2915d
UD
477 nop.i 0 ;;
478}
479
480{ .mfb
481 nop.m 999
482 frcpa.s0 f8,p7=f8,f9
a334319f 483 (p11) br.cond.spnt L(EXP_ERROR_RETURN);;
8da2915d
UD
484}
485{ .mib
486 nop.m 0
487 nop.i 0
488 br.ret.spnt b0 ;;
489}
490
491
a334319f 492L(FREM_Y_NAN_INF_ZERO):
8da2915d
UD
493// Y INF
494{ .mfi
495 nop.m 999
a334319f 496(p0) fclass.m.unc p7,p0 = f9, 0x23
8da2915d
UD
497 nop.i 999 ;;
498}
499
500{ .mfb
501 nop.m 999
a334319f 502(p7) fma f8=f8,f1,f0
8da2915d
UD
503(p7) br.ret.spnt b0 ;;
504}
505
506// Y NAN?
507{ .mfi
508 nop.m 999
a334319f 509(p0) fclass.m.unc p9,p10 = f9, 0xc3
8da2915d
UD
510 nop.i 999 ;;
511}
512{ .mfi
513 nop.m 999
514(p10) fclass.nm p9,p0 = f9, 0xff
515 nop.i 999 ;;
516}
517
518{ .mfb
519 nop.m 999
a334319f 520(p9) fma f8=f9,f1,f0
8da2915d
UD
521(p9) br.ret.spnt b0 ;;
522}
523
a334319f 524L(FREM_Y_ZERO):
8da2915d
UD
525// Y zero? Must be zero at this point
526// because it is the only choice left.
527// Return QNAN indefinite
528
529// X NAN?
530{ .mfi
531 nop.m 999
a334319f 532(p0) fclass.m.unc p9,p10 = f8, 0xc3
8da2915d
UD
533 nop.i 999 ;;
534}
535{ .mfi
536 nop.m 999
537(p10) fclass.nm p9,p10 = f8, 0xff
538 nop.i 999 ;;
539}
540
541{.mfi
542 nop.m 999
a334319f 543 (p9) frcpa f11,p7=f8,f0
8da2915d
UD
544 nop.i 0;;
545}
546{ .mfi
547 nop.m 999
a334319f 548(p10) frcpa f11,p7 = f0,f0
8da2915d
UD
549 nop.i 999;;
550}
551
552{ .mfi
553 nop.m 999
a334319f 554(p0) fmerge.s f10 = f8, f8
8da2915d
UD
555 nop.i 999
556}
557
558{ .mfi
559 nop.m 999
a334319f 560(p0) fma f8=f11,f1,f0
8da2915d
UD
561 nop.i 999;;
562}
563
a334319f 564L(EXP_ERROR_RETURN):
8da2915d
UD
565
566{ .mib
a334319f 567(p0) mov GR_Parameter_TAG = 123
8da2915d 568 nop.i 999
a334319f 569(p0) br.sptk __libm_error_region;;
8da2915d
UD
570}
571
a334319f
UD
572.endp remainderl
573ASM_SIZE_DIRECTIVE(remainderl)
574#ifdef _LIBC
575ASM_SIZE_DIRECTIVE(__remainderl)
576#endif
8da2915d 577
a334319f
UD
578.proc __libm_error_region
579__libm_error_region:
8da2915d
UD
580.prologue
581{ .mfi
582 add GR_Parameter_Y=-32,sp // Parameter 2 value
583 nop.f 0
584.save ar.pfs,GR_SAVE_PFS
585 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
586}
587{ .mfi
588.fframe 64
589 add sp=-64,sp // Create new stack
590 nop.f 0
591 mov GR_SAVE_GP=gp // Save gp
592};;
593{ .mmi
594 stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
595 add GR_Parameter_X = 16,sp // Parameter 1 address
596.save b0, GR_SAVE_B0
597 mov GR_SAVE_B0=b0 // Save b0
598};;
599.body
600{ .mib
601 stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
602 add GR_Parameter_RESULT = 0,GR_Parameter_Y
603 nop.b 0 // Parameter 3 address
604}
605{ .mib
606 stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
607 add GR_Parameter_Y = -16,GR_Parameter_Y
608 br.call.sptk b0=__libm_error_support# // Call error handling function
609};;
610{ .mmi
611 nop.m 0
612 nop.m 0
613 add GR_Parameter_RESULT = 48,sp
614};;
615{ .mmi
616 ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
617.restore sp
618 add sp = 64,sp // Restore stack pointer
619 mov b0 = GR_SAVE_B0 // Restore return address
620};;
621{ .mib
622 mov gp = GR_SAVE_GP // Restore gp
623 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
624 br.ret.sptk b0 // Return
625};;
626
a334319f
UD
627.endp __libm_error_region
628ASM_SIZE_DIRECTIVE(__libm_error_region)
8da2915d
UD
629
630
631.type __libm_error_support#,@function
632.global __libm_error_support#