]>
Commit | Line | Data |
---|---|---|
aece054b | 1 | /* sqrtf function. PowerPC32 version. |
bfff8b1b | 2 | Copyright (C) 2007-2017 Free Software Foundation, Inc. |
aece054b UD |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 PE |
16 | License along with the GNU C Library; if not, see |
17 | <http://www.gnu.org/licenses/>. */ | |
aece054b UD |
18 | |
19 | #include <sysdep.h> | |
20 | #include <math_ldbl_opt.h> | |
21 | ||
22 | /* float [fp1] sqrts (float x [fp1]) | |
23 | Power4 (ISA V2.0) and above implement sqrt in hardware (not optional). | |
24 | The fsqrts instruction generates the correct value for all inputs and | |
2ccdea26 | 25 | sets the appropriate floating point exceptions. Extended checking is |
aece054b UD |
26 | only needed to set errno (via __kernel_standard) if the input value |
27 | is negative. | |
9c84384c | 28 | |
aece054b UD |
29 | So compare the input value against the absolute value of itself. |
30 | This will compare equal unless the value is negative (EDOM) or a NAN, | |
31 | in which case we branch to the extend wrapper. If equal we can return | |
32 | the result directly. | |
9c84384c | 33 | |
aece054b UD |
34 | This part of the function looks like a leaf routine, so no need to |
35 | stack a frame or execute prologue/epilogue code. It is safe to | |
36 | branch directly to w_sqrt as long as the input value (f1) is | |
ded5b9b7 | 37 | preserved. Putting the sqrt result into f2 (float parameter 2) |
aece054b UD |
38 | allows passing both the input value and sqrt result into the extended |
39 | wrapper so there is no need to recompute. | |
9c84384c | 40 | |
aece054b UD |
41 | This tactic avoids the overhead of stacking a frame for the normal |
42 | (non-error) case. Until gcc supports prologue shrink-wrapping | |
43 | this is the best we can do. */ | |
44 | ||
45 | .section ".text" | |
46 | .machine power4 | |
47 | EALIGN (__sqrtf, 5, 0) | |
48 | fabs fp0,fp1 | |
49 | fsqrts fp2,fp1 | |
50 | fcmpu cr1,fp0,fp1 | |
51 | bne- cr1,.Lw_sqrtf | |
52 | fmr fp1,fp2 | |
53 | blr | |
54 | .align 4 | |
55 | .Lw_sqrtf: | |
56 | mflr r0 | |
57 | stwu r1,-16(r1) | |
58 | cfi_adjust_cfa_offset(16) | |
59 | fmr fp12,fp2 | |
60 | stw r0,20(r1) | |
61 | stw r30,8(r1) | |
3f241d75 AS |
62 | cfi_offset(lr,20-16) |
63 | cfi_offset(r30,8-16) | |
aece054b | 64 | #ifdef SHARED |
91d2a845 WS |
65 | SETUP_GOT_ACCESS(r30,got_label) |
66 | addis r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@ha | |
67 | addi r30,r30,_GLOBAL_OFFSET_TABLE_-got_label@l | |
aece054b UD |
68 | lwz r9,_LIB_VERSION@got(30) |
69 | lwz r0,0(r9) | |
aece054b UD |
70 | #else |
71 | lis r9,_LIB_VERSION@ha | |
72 | lwz r0,_LIB_VERSION@l(r9) | |
73 | #endif | |
74 | /* if (_LIB_VERSION == _IEEE_) return z; */ | |
75 | cmpwi cr7,r0,-1 | |
76 | beq- cr7,.L4 | |
77 | /* if (x != x, 0) return z; !isnan */ | |
78 | fcmpu cr7,fp1,fp1 | |
79 | bne- cr7,.L4 | |
80 | /* if (x < 0.0) | |
81 | return __kernel_standard (x, x, 126) */ | |
82 | fmr fp2,fp1 | |
83 | li r3,126 | |
84 | bne- cr1,.L11 | |
85 | .L4: | |
86 | lwz r0,20(r1) | |
87 | fmr fp1,fp12 | |
88 | lwz r30,8(r1) | |
89 | addi r1,r1,16 | |
90 | mtlr r0 | |
91 | blr | |
92 | .L11: | |
93 | bl __kernel_standard@plt | |
94 | fmr fp12,fp1 | |
95 | b .L4 | |
96 | END (__sqrtf) | |
97 | ||
98 | weak_alias (__sqrtf, sqrtf) |