]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/ia64/fpu/e_remainder.S
Remove "Contributed by" lines
[thirdparty/glibc.git] / sysdeps / ia64 / fpu / e_remainder.S
CommitLineData
d5efd131
MF
1.file "remainder.s"
2
3
4// Copyright (c) 2000 - 2003, Intel Corporation
5// All rights reserved.
6//
d5efd131
MF
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11//
12// * Redistributions of source code must retain the above copyright
13// notice, this list of conditions and the following disclaimer.
14//
15// * Redistributions in binary form must reproduce the above copyright
16// notice, this list of conditions and the following disclaimer in the
17// documentation and/or other materials provided with the distribution.
18//
19// * The name of Intel Corporation may not be used to endorse or promote
20// products derived from this software without specific prior written
21// permission.
22
23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34//
35// Intel Corporation is the author of this code, and requests that all
36// problem reports or change requests be submitted to it directly at
37// http://www.intel.com/software/products/opensource/libraries/num.htm.
38//
39// History
40//====================================================================
41// 02/02/00 Initial version
42// 03/02/00 New Algorithm
43// 04/04/00 Unwind support added
44// 07/21/00 Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
45// 08/15/00 Bundle added after call to __libm_error_support to properly
46// set [the previously overwritten] GR_Parameter_RESULT.
47// 11/29/00 Set FR_Y to f9
48// 05/20/02 Cleaned up namespace and sf0 syntax
49// 02/10/03 Reordered header: .section, .global, .proc, .align
50//
51// API
52//====================================================================
0347518d 53// double remainder(double,double);
d5efd131
MF
54//
55// Overview of operation
56//====================================================================
57// remainder(a,b)=a-i*b,
0347518d 58// where i is an integer such that, if b!=0 and a is finite,
d5efd131
MF
59// |a/b-i|<=1/2. If |a/b-i|=1/2, i is even.
60//
61// Algorithm
62//====================================================================
63// a). eliminate special cases
64// b). if |a/b|<0.25 (first quotient estimate), return a
65// c). use single precision divide algorithm to get quotient q
0347518d
MF
66// rounded to 24 bits of precision
67// d). calculate partial remainders (using both q and q-ulp);
68// select one and RZ(a/b) based on the sign of |a|-|b|*q
d5efd131 69// e). if the exponent difference (exponent(a)-exponent(b))
0347518d 70// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b)
d5efd131
MF
71// and sticky bits to round to integer; exit loop and
72// calculate final remainder
73// f). if exponent(a)-exponent(b)>=24, select new value of a as
0347518d
MF
74// the partial remainder calculated using RZ(a/b);
75// repeat from c).
d5efd131
MF
76//
77// Special cases
78//====================================================================
79// a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
80// a=NaN or b=NaN: return NaN
81
82// Registers used
83//====================================================================
84// Predicate registers: p6-p14
85// General registers: r2,r3,r28,r29,r32 (ar.pfs), r33-r39
86// Floating point registers: f6-f15,f32
87
88GR_SAVE_B0 = r33
89GR_SAVE_PFS = r34
0347518d 90GR_SAVE_GP = r35
d5efd131
MF
91GR_SAVE_SP = r36
92
93GR_Parameter_X = r37
94GR_Parameter_Y = r38
95GR_Parameter_RESULT = r39
96GR_Parameter_TAG = r40
97
98FR_X = f10
99FR_Y = f9
100FR_RESULT = f8
101
102
103.section .text
104GLOBAL_IEEE754_ENTRY(remainder)
105
106// inputs in f8, f9
107// result in f8
108
109{ .mfi
110 alloc r32=ar.pfs,1,4,4,0
111 // f13=|a|
112 fmerge.s f13=f0,f8
113 nop.i 0
114}
115 {.mfi
116 nop.m 0
117 // f14=|b|
118 fmerge.s f14=f0,f9
119 nop.i 0;;
120}
121 {.mlx
122 mov r28=0x2ffdd
123 // r2=2^{23}
124 movl r3=0x4b000000;;
125}
126
127// Y +-NAN, +-inf, +-0? p11
128{ .mfi
129 setf.exp f32=r28
0347518d 130 fclass.m.unc p11,p0 = f9, 0xe7
d5efd131
MF
131 nop.i 999
132}
133// qnan snan inf norm unorm 0 -+
134// 1 1 1 0 0 0 11
135// e 3
136// X +-NAN, +-inf, ? p9
137{ .mfi
138 nop.m 999
0347518d
MF
139 fclass.m.unc p9,p0 = f8, 0xe3
140 nop.i 999;;
d5efd131
MF
141}
142
143{.mfi
144 nop.m 0
145 mov f12=f0
146 nop.i 0
147}
148{ .mfi
149 // set p7=1
150 cmp.eq.unc p7,p0=r0,r0
151 // Step (1)
152 // y0 = 1 / b in f10
153 frcpa.s1 f10,p6=f13,f14
154 nop.i 0;;
0347518d 155}
d5efd131
MF
156
157{.bbb
158 (p9) br.cond.spnt FREM_X_NAN_INF
159 (p11) br.cond.spnt FREM_Y_NAN_INF_ZERO
160 nop.b 0
161} {.mfi
162 nop.m 0
163 // set D flag if a (f8) is denormal
164 fnma.s0 f6=f8,f1,f8
165 nop.i 0;;
0347518d 166}
d5efd131
MF
167
168
0347518d 169remloop24:
d5efd131
MF
170 { .mfi
171 nop.m 0
172 // Step (2)
173 // q0 = a * y0 in f12
174 (p6) fma.s1 f12=f13,f10,f0
175 nop.i 0
176} { .mfi
177 nop.m 0
178 // Step (3)
179 // e0 = 1 - b * y0 in f7
180 (p6) fnma.s1 f7=f14,f10,f1
181 nop.i 0;;
182} {.mlx
183 nop.m 0
184 // r2=1.25*2^{-24}
185 movl r2=0x33a00000;;
0347518d 186}
d5efd131
MF
187
188{.mfi
189 nop.m 0
190 // q1=q0*(1+e0)
191 (p6) fma.s1 f15=f12,f7,f12
192 nop.i 0
193}
194{ .mfi
195 nop.m 0
196 // Step (4)
197 // e1 = e0 * e0 + E in f7
198 (p6) fma.s1 f7=f7,f7,f32
199 nop.i 0;;
200}
201 {.mii
202 (p7) getf.exp r29=f12
203 (p7) mov r28=0xfffd
204 nop.i 0;;
205}
206 { .mfi
207 // f12=2^{23}
208 setf.s f12=r3
209 // Step (5)
210 // q2 = q1 + e1 * q1 in f11
211 (p6) fma.s.s1 f11=f7,f15,f15
212 nop.i 0
213} { .mfi
214 nop.m 0
215 // Step (6)
216 // q2 = q1 + e1 * q1 in f6
217 (p6) fma.s1 f6=f7,f15,f15
218 nop.i 0;;
0347518d 219}
d5efd131
MF
220
221 {.mmi
222 // f15=1.25*2^{-24}
223 setf.s f15=r2
0347518d 224 // q<1/4 ? (i.e. expon< -2)
d5efd131
MF
225 (p7) cmp.gt p7,p0=r28,r29
226 nop.i 0;;
227}
228
229{.mfb
230 // r29= -32+bias
231 mov r29=0xffdf
0347518d 232 // if |a/b|<1/4, set D flag before returning
d5efd131
MF
233 (p7) fma.d.s0 f9=f9,f0,f8
234 nop.b 0;;
235}
236 {.mfb
237 nop.m 0
238 // can be combined with bundle above if sign of 0 or
239 // FTZ enabled are not important
240 (p7) fmerge.s f8=f8,f9
241 // return if |a|<4*|b| (estimated quotient < 1/4)
242 (p7) br.ret.spnt b0;;
243}
244 {.mfi
245 // f7=2^{-32}
246 setf.exp f7=r29
247 // set f8 to current a value | sign
248 fmerge.s f8=f8,f13
249 nop.i 0;;
0347518d 250}
d5efd131
MF
251
252
253 {.mfi
254 getf.exp r28=f6
255 // last step ? (q<2^{23})
256 fcmp.lt.unc.s1 p0,p12=f6,f12
257 nop.i 0;;
258}
259 {.mfi
260 nop.m 0
261 // r=a-b*q
262 fnma.s1 f6=f14,f11,f13
263 nop.i 0
264} {.mfi
265 // r2=23+bias
266 mov r2=0xffff+23
267 // q'=q-q*(1.25*2^{-24}) (q'=q-ulp)
268 fnma.s.s1 f15=f11,f15,f11
269 nop.i 0;;
270}
271 {.mmi
272 nop.m 0
273 cmp.eq p11,p14=r2,r28
274 nop.i 0;;
0347518d 275}
d5efd131
MF
276
277.pred.rel "mutex",p11,p14
278 {.mfi
279 nop.m 0
280 // if exp_q=2^23, then r=a-b*2^{23}
281 (p11) fnma.s1 f13=f12,f14,f13
282 nop.i 0
0347518d 283}
d5efd131
MF
284{.mfi
285 nop.m 0
286 // r2=a-b*q'
287 (p14) fnma.s1 f13=f14,f15,f13
288 nop.i 0;;
289}
290 {.mfi
291 nop.m 0
292 // r>0 iff q=RZ(a/b) and inexact
293 fcmp.gt.unc.s1 p8,p0=f6,f0
294 nop.i 0
295} {.mfi
296 nop.m 0
297 // r<0 iff q'=RZ(a/b) and inexact
298 (p14) fcmp.lt.unc.s1 p9,p10=f6,f0
299 nop.i 0;;
300}
301
302.pred.rel "mutex",p8,p9
303 {.mfi
0347518d 304 nop.m 0
d5efd131
MF
305 // (p8) Q=q+(last iteration ? sticky bits:0)
306 // i.e. Q=q+q*x (x=2^{-32} or 0)
307 (p8) fma.s1 f11=f11,f7,f11
308 nop.i 0
309} {.mfi
310 nop.m 0
311 // (p9) Q=q'+(last iteration ? sticky bits:0)
312 // i.e. Q=q'+q'*x (x=2^{-32} or 0)
313 (p9) fma.s1 f11=f15,f7,f15
314 nop.i 0;;
315}
316
317 {.mfb
318 nop.m 0
319 // (p9) set r=r2 (new a, if not last iteration)
320 // (p10) new a =r
321 (p10) mov f13=f6
322 (p12) br.cond.sptk remloop24;;
0347518d 323}
d5efd131
MF
324
325// last iteration
326 {.mfi
327 nop.m 0
328 // set f9=|b|*sgn(a)
329 fmerge.s f9=f8,f9
330 nop.i 0
331}
332 {.mfi
333 nop.m 0
334 // round to integer
335 fcvt.fx.s1 f11=f11
336 nop.i 0;;
337}
338 {.mfi
339 nop.m 0
340 // save sign of a
341 fmerge.s f7=f8,f8
342 nop.i 0
0347518d 343} {.mfi
d5efd131
MF
344 nop.m 0
345 // normalize
346 fcvt.xf f11=f11
347 nop.i 0;;
0347518d 348}
d5efd131
MF
349 {.mfi
350 nop.m 0
0347518d 351 // This can be removed if sign of 0 is not important
d5efd131
MF
352 // get remainder using sf1
353 fnma.d.s1 f12=f9,f11,f8
354 nop.i 0
355}
356 {.mfi
357 nop.m 0
358 // get remainder
359 fnma.d.s0 f8=f9,f11,f8
360 nop.i 0;;
361}
362 {.mfi
363 nop.m 0
364 // f12=0?
0347518d 365 // This can be removed if sign of 0 is not important
d5efd131
MF
366 fcmp.eq.unc.s1 p8,p0=f12,f0
367 nop.i 0;;
368}
369 {.mfb
370 nop.m 0
371 // if f8=0, set sign correctly
0347518d 372 // This can be removed if sign of 0 is not important
d5efd131
MF
373 (p8) fmerge.s f8=f7,f8
374 // return
375 br.ret.sptk b0;;
376}
377
378
0347518d 379FREM_X_NAN_INF:
d5efd131
MF
380
381// Y zero ?
0347518d 382{.mfi
d5efd131
MF
383 nop.m 0
384 fma.s1 f10=f9,f1,f0
385 nop.i 0;;
386}
387{.mfi
388 nop.m 0
389 fcmp.eq.unc.s1 p11,p0=f10,f0
390 nop.i 0;;
391}
392{.mib
393 nop.m 0
394 nop.i 0
395 // if Y zero
0347518d 396 (p11) br.cond.spnt FREM_Y_ZERO;;
d5efd131
MF
397}
398
399// X infinity? Return QNAN indefinite
400{ .mfi
401 nop.m 999
0347518d 402 fclass.m.unc p8,p0 = f8, 0x23
d5efd131
MF
403 nop.i 999
404}
405// X infinity? Return QNAN indefinite
406{ .mfi
407 nop.m 999
0347518d
MF
408 fclass.m.unc p11,p0 = f8, 0x23
409 nop.i 999;;
d5efd131
MF
410}
411// Y NaN ?
412{.mfi
413 nop.m 999
414(p8) fclass.m.unc p0,p8=f9,0xc3
415 nop.i 0;;
416}
417{.mfi
418 nop.m 999
419 // also set Denormal flag if necessary
420(p8) fma.s0 f9=f9,f1,f0
421 nop.i 0
0347518d 422}
d5efd131
MF
423{ .mfi
424 nop.m 999
0347518d 425(p8) frcpa.s0 f8,p7 = f8,f8
d5efd131
MF
426 nop.i 999 ;;
427}
428
429{.mfi
430 nop.m 999
431(p11) mov f10=f8
432 nop.i 0
433}
434{ .mfi
435 nop.m 999
0347518d
MF
436(p8) fma.d.s0 f8=f8,f1,f0
437 nop.i 0 ;;
d5efd131
MF
438}
439
440{ .mfb
441 nop.m 999
0347518d
MF
442 frcpa.s0 f8,p7=f8,f9
443 (p11) br.cond.spnt EXP_ERROR_RETURN;;
d5efd131
MF
444}
445{ .mib
446 nop.m 0
447 nop.i 0
0347518d 448 br.ret.spnt b0 ;;
d5efd131
MF
449}
450
451
0347518d 452FREM_Y_NAN_INF_ZERO:
d5efd131
MF
453
454// Y INF
455{ .mfi
456 nop.m 999
0347518d 457 fclass.m.unc p7,p0 = f9, 0x23
d5efd131
MF
458 nop.i 999 ;;
459}
460
461{ .mfb
462 nop.m 999
0347518d
MF
463(p7) fma.d.s0 f8=f8,f1,f0
464(p7) br.ret.spnt b0 ;;
d5efd131
MF
465}
466
467// Y NAN?
468{ .mfi
469 nop.m 999
0347518d 470 fclass.m.unc p9,p0 = f9, 0xc3
d5efd131
MF
471 nop.i 999 ;;
472}
473
474{ .mfb
475 nop.m 999
0347518d
MF
476(p9) fma.d.s0 f8=f9,f1,f0
477(p9) br.ret.spnt b0 ;;
d5efd131
MF
478}
479
480FREM_Y_ZERO:
481// Y zero? Must be zero at this point
482// because it is the only choice left.
483// Return QNAN indefinite
484
485// X NAN?
486{ .mfi
487 nop.m 999
0347518d 488 fclass.m.unc p9,p10 = f8, 0xc3
d5efd131
MF
489 nop.i 999 ;;
490}
491{ .mfi
492 nop.m 999
0347518d 493(p10) fclass.nm p9,p10 = f8, 0xff
d5efd131
MF
494 nop.i 999 ;;
495}
496
497{.mfi
498 nop.m 999
499 (p9) frcpa.s0 f11,p7=f8,f0
500 nop.i 0;;
501}
502
503{ .mfi
504 nop.m 999
0347518d
MF
505(p10) frcpa.s0 f11,p7 = f0,f0
506 nop.i 999;;
d5efd131
MF
507}
508
509{ .mfi
510 nop.m 999
0347518d 511 fmerge.s f10 = f8, f8
d5efd131
MF
512 nop.i 999
513}
514
515{ .mfi
516 nop.m 999
0347518d 517 fma.d.s0 f8=f11,f1,f0
d5efd131
MF
518 nop.i 999
519}
520
521
0347518d 522EXP_ERROR_RETURN:
d5efd131
MF
523
524{ .mib
0347518d 525 mov GR_Parameter_TAG = 124
d5efd131 526 nop.i 999
0347518d 527 br.sptk __libm_error_region;;
d5efd131
MF
528}
529
530GLOBAL_IEEE754_END(remainder)
0609ec0a 531libm_alias_double_other (__remainder, remainder)
5ce8f125 532weak_alias (__remainder, drem)
d5efd131
MF
533
534
535
536LOCAL_LIBM_ENTRY(__libm_error_region)
537.prologue
538{ .mfi
539 add GR_Parameter_Y=-32,sp // Parameter 2 value
540 nop.f 0
541.save ar.pfs,GR_SAVE_PFS
0347518d 542 mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
d5efd131
MF
543}
544{ .mfi
0347518d 545.fframe 64
d5efd131
MF
546 add sp=-64,sp // Create new stack
547 nop.f 0
548 mov GR_SAVE_GP=gp // Save gp
549};;
550{ .mmi
551 stfd [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
552 add GR_Parameter_X = 16,sp // Parameter 1 address
0347518d
MF
553.save b0, GR_SAVE_B0
554 mov GR_SAVE_B0=b0 // Save b0
d5efd131
MF
555};;
556.body
557{ .mib
0347518d
MF
558 stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
559 add GR_Parameter_RESULT = 0,GR_Parameter_Y
d5efd131
MF
560 nop.b 0 // Parameter 3 address
561}
562{ .mib
563 stfd [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
0347518d 564 add GR_Parameter_Y = -16,GR_Parameter_Y
d5efd131
MF
565 br.call.sptk b0=__libm_error_support# // Call error handling function
566};;
567{ .mmi
568 nop.m 0
569 nop.m 0
570 add GR_Parameter_RESULT = 48,sp
571};;
572{ .mmi
573 ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
574.restore sp
575 add sp = 64,sp // Restore stack pointer
576 mov b0 = GR_SAVE_B0 // Restore return address
577};;
578{ .mib
0347518d 579 mov gp = GR_SAVE_GP // Restore gp
d5efd131
MF
580 mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
581 br.ret.sptk b0 // Return
0347518d 582};;
d5efd131
MF
583
584LOCAL_LIBM_END(__libm_error_region)
585
586
587
588.type __libm_error_support#,@function
589.global __libm_error_support#