]>
Commit | Line | Data |
---|---|---|
aece054b UD |
1 | /* sqrtf function. PowerPC32 version. |
2 | Copyright (C) 2007 Free Software Foundation, Inc. | |
3 | This file is part of the GNU C Library. | |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
16 | License along with the GNU C Library; if not, write to the Free | |
17 | Software Foundation, Inc., 1 Franklin Street, Fifth Floor, Boston MA | |
18 | 02110-1301 USA. */ | |
19 | ||
20 | #include <sysdep.h> | |
21 | #include <math_ldbl_opt.h> | |
22 | ||
23 | /* float [fp1] sqrts (float x [fp1]) | |
24 | Power4 (ISA V2.0) and above implement sqrt in hardware (not optional). | |
25 | The fsqrts instruction generates the correct value for all inputs and | |
26 | sets the appropriate floating point exceptions. Extented checking is | |
27 | only needed to set errno (via __kernel_standard) if the input value | |
28 | is negative. | |
29 | ||
30 | So compare the input value against the absolute value of itself. | |
31 | This will compare equal unless the value is negative (EDOM) or a NAN, | |
32 | in which case we branch to the extend wrapper. If equal we can return | |
33 | the result directly. | |
34 | ||
35 | This part of the function looks like a leaf routine, so no need to | |
36 | stack a frame or execute prologue/epilogue code. It is safe to | |
37 | branch directly to w_sqrt as long as the input value (f1) is | |
38 | preserved. Putting the the sqrt result into f2 (float parameter 2) | |
39 | allows passing both the input value and sqrt result into the extended | |
40 | wrapper so there is no need to recompute. | |
41 | ||
42 | This tactic avoids the overhead of stacking a frame for the normal | |
43 | (non-error) case. Until gcc supports prologue shrink-wrapping | |
44 | this is the best we can do. */ | |
45 | ||
46 | .section ".text" | |
47 | .machine power4 | |
48 | EALIGN (__sqrtf, 5, 0) | |
49 | fabs fp0,fp1 | |
50 | fsqrts fp2,fp1 | |
51 | fcmpu cr1,fp0,fp1 | |
52 | bne- cr1,.Lw_sqrtf | |
53 | fmr fp1,fp2 | |
54 | blr | |
55 | .align 4 | |
56 | .Lw_sqrtf: | |
57 | mflr r0 | |
58 | stwu r1,-16(r1) | |
59 | cfi_adjust_cfa_offset(16) | |
60 | fmr fp12,fp2 | |
61 | stw r0,20(r1) | |
62 | stw r30,8(r1) | |
3f241d75 AS |
63 | cfi_offset(lr,20-16) |
64 | cfi_offset(r30,8-16) | |
aece054b | 65 | #ifdef SHARED |
aece054b UD |
66 | bcl 20,31,.LCF1 |
67 | .LCF1: | |
68 | mflr r30 | |
69 | addis r30,r30,_GLOBAL_OFFSET_TABLE_-.LCF1@ha | |
70 | addi r30,r30,_GLOBAL_OFFSET_TABLE_-.LCF1@l | |
71 | lwz r9,_LIB_VERSION@got(30) | |
72 | lwz r0,0(r9) | |
aece054b UD |
73 | #else |
74 | lis r9,_LIB_VERSION@ha | |
75 | lwz r0,_LIB_VERSION@l(r9) | |
76 | #endif | |
77 | /* if (_LIB_VERSION == _IEEE_) return z; */ | |
78 | cmpwi cr7,r0,-1 | |
79 | beq- cr7,.L4 | |
80 | /* if (x != x, 0) return z; !isnan */ | |
81 | fcmpu cr7,fp1,fp1 | |
82 | bne- cr7,.L4 | |
83 | /* if (x < 0.0) | |
84 | return __kernel_standard (x, x, 126) */ | |
85 | fmr fp2,fp1 | |
86 | li r3,126 | |
87 | bne- cr1,.L11 | |
88 | .L4: | |
89 | lwz r0,20(r1) | |
90 | fmr fp1,fp12 | |
91 | lwz r30,8(r1) | |
92 | addi r1,r1,16 | |
93 | mtlr r0 | |
94 | blr | |
95 | .L11: | |
96 | bl __kernel_standard@plt | |
97 | fmr fp12,fp1 | |
98 | b .L4 | |
99 | END (__sqrtf) | |
100 | ||
101 | weak_alias (__sqrtf, sqrtf) | |
102 |