]>
Commit | Line | Data |
---|---|---|
0ecb606c JJ |
1 | .file "fdim.s" |
2 | ||
3 | ||
4 | // Copyright (c) 2001 - 2003, Intel Corporation | |
5 | // All rights reserved. | |
6 | // | |
7 | // Contributed 2001 by the Intel Numerics Group, Intel Corporation | |
8 | // | |
9 | // Redistribution and use in source and binary forms, with or without | |
10 | // modification, are permitted provided that the following conditions are | |
11 | // met: | |
12 | // | |
13 | // * Redistributions of source code must retain the above copyright | |
14 | // notice, this list of conditions and the following disclaimer. | |
15 | // | |
16 | // * Redistributions in binary form must reproduce the above copyright | |
17 | // notice, this list of conditions and the following disclaimer in the | |
18 | // documentation and/or other materials provided with the distribution. | |
19 | // | |
20 | // * The name of Intel Corporation may not be used to endorse or promote | |
21 | // products derived from this software without specific prior written | |
22 | // permission. | |
23 | ||
24 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
25 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
26 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
27 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS | |
28 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
29 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
30 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
31 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
32 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING | |
33 | // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
34 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
35 | // | |
36 | // Intel Corporation is the author of this code, and requests that all | |
37 | // problem reports or change requests be submitted to it directly at | |
38 | // http://www.intel.com/software/products/opensource/libraries/num.htm. | |
39 | // | |
40 | // History | |
41 | //============================================================== | |
42 | // 06/08/01 Initial version | |
43 | // 08/23/01 Corrected error tag number | |
44 | // 05/20/02 Cleaned up namespace and sf0 syntax | |
45 | // 01/28/03 Improved performance | |
46 | // | |
47 | // API | |
48 | //============================================================== | |
49 | // double fdim( double x, double y ); | |
50 | // input floating point f8, f9 | |
51 | // output floating point f8 | |
52 | // | |
53 | // | |
54 | // Overview of operation | |
55 | //============================================================== | |
56 | // fdim determines the positive difference between the arguments | |
57 | // Result = x - y if x > y | |
58 | // = +0 if x <= y | |
59 | // | |
60 | // Error support is called if x-y overflows for x > y | |
61 | // | |
62 | ||
63 | // Registers used | |
64 | //============================================================== | |
65 | // General purpose registers: r14, r32 - r39 | |
66 | ||
67 | rExpBig = r14 | |
68 | ||
69 | // r36-39 parameters for libm_error_support | |
70 | GR_SAVE_B0 = r33 | |
71 | GR_SAVE_GP = r34 | |
72 | GR_SAVE_PFS = r35 | |
73 | ||
74 | GR_Parameter_X = r36 | |
75 | GR_Parameter_Y = r37 | |
76 | GR_Parameter_RESULT = r38 | |
77 | GR_Parameter_TAG = r39 | |
78 | ||
79 | // Floating-point registers: f8 - f12 | |
80 | ||
81 | f_tmp_result = f10 | |
82 | fBig = f11 | |
83 | fNormX = f12 | |
84 | ||
85 | // Predicate registers: p6 - p10 | |
86 | ||
87 | ||
88 | .section .text | |
89 | GLOBAL_LIBM_ENTRY(fdim) | |
90 | ||
91 | { .mfi | |
92 | mov rExpBig = 0x103ff // Exponent to indicate overflow | |
93 | fcmp.le.s1 p6,p7 = f8, f9 // Is x <= y? | |
94 | nop.i 0 | |
95 | } | |
96 | { .mfi | |
97 | nop.m 0 | |
98 | fnorm.s1 fNormX = f8 // Save x | |
99 | nop.i 0 | |
100 | } | |
101 | ;; | |
102 | ||
103 | { .mfi | |
104 | setf.exp fBig = rExpBig // Constant to test for overflow | |
105 | fcmp.eq.s0 p8,p0 = f8, f9 // Dummy op to set Denormal or Invalid | |
106 | nop.i 0 | |
107 | } | |
108 | ;; | |
109 | ||
110 | { .mfi | |
111 | nop.m 0 | |
112 | fclass.m p9,p10 = f8, 0x1e3 // Test for x natval, nan, inf | |
113 | nop.i 0 | |
114 | } | |
115 | ;; | |
116 | ||
117 | { .mfi | |
118 | nop.m 0 | |
119 | (p6) fmerge.s f8 = f0, f0 // Result is +0 if x <= y | |
120 | nop.i 0 | |
121 | } | |
122 | { .mfi | |
123 | nop.m 0 | |
124 | (p7) fms.d.s0 f8 = f8, f1, f9 // Result is x - y if x > y | |
125 | nop.i 0 | |
126 | } | |
127 | ;; | |
128 | ||
129 | { .mfi | |
130 | nop.m 0 | |
131 | (p10) fclass.m p9,p10 = f9, 0x1e3 // Test for y natval, nan, inf | |
132 | nop.i 0 | |
133 | } | |
134 | ;; | |
135 | ||
136 | { .mfi | |
137 | nop.m 0 | |
138 | (p10) fcmp.ge.s1 p8,p0 = f8, fBig // Test result for overflow | |
139 | nop.i 0 | |
140 | } | |
141 | ;; | |
142 | ||
143 | { .mbb | |
144 | (p9) cmp.ne p8,p0 = r0,r0 // Clear p8 if x or y natval,nan,inf | |
145 | (p8) br.cond.spnt FDIM_OVERFLOW // Branch if result overflows | |
146 | br.ret.sptk b0 // Normal return | |
147 | } | |
148 | ;; | |
149 | ||
150 | ||
151 | // Here if result will overflow | |
152 | FDIM_OVERFLOW: | |
153 | { .mfi | |
154 | alloc r32=ar.pfs,2,2,4,0 | |
155 | fms.d.s0 f_tmp_result = f8,f1,f9 // Normalize result force overflow | |
156 | nop.i 0 | |
157 | } | |
158 | { .mfb | |
159 | mov GR_Parameter_TAG = 196 // Error code | |
160 | nop.f 0 | |
161 | br.cond.sptk __libm_error_region // Branch to error code | |
162 | } | |
163 | ;; | |
164 | ||
165 | GLOBAL_LIBM_END(fdim) | |
166 | ||
167 | ||
168 | LOCAL_LIBM_ENTRY(__libm_error_region) | |
169 | // Call error support to report possible range error | |
170 | .prologue | |
171 | ||
172 | { .mfi | |
173 | add GR_Parameter_Y=-32,sp // Parameter 2 value | |
174 | nop.f 0 | |
175 | .save ar.pfs,GR_SAVE_PFS | |
176 | mov GR_SAVE_PFS=ar.pfs // Save ar.pfs | |
177 | } | |
178 | { .mfi | |
179 | .fframe 64 | |
180 | add sp=-64,sp // Create new stack | |
181 | nop.f 0 | |
182 | mov GR_SAVE_GP=gp // Save gp | |
183 | };; | |
184 | ||
185 | { .mmi | |
186 | stfd [GR_Parameter_Y] = f9,16 // STORE Parameter 2 on stack | |
187 | add GR_Parameter_X = 16,sp // Parameter 1 address | |
188 | .save b0, GR_SAVE_B0 | |
189 | mov GR_SAVE_B0=b0 // Save b0 | |
190 | };; | |
191 | ||
192 | .body | |
193 | { .mib | |
194 | stfd [GR_Parameter_X] = fNormX // STORE Parameter 1 on stack | |
195 | add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address | |
196 | nop.b 0 | |
197 | } | |
198 | { .mib | |
199 | stfd [GR_Parameter_Y] = f_tmp_result // STORE Parameter 3 on stack | |
200 | add GR_Parameter_Y = -16,GR_Parameter_Y | |
201 | br.call.sptk b0=__libm_error_support# // Call error handling function | |
202 | };; | |
203 | ||
204 | { .mmi | |
205 | add GR_Parameter_RESULT = 48,sp | |
206 | nop.m 0 | |
207 | nop.i 0 | |
208 | };; | |
209 | ||
210 | { .mmi | |
211 | ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack | |
212 | .restore sp | |
213 | add sp = 64,sp // Restore stack pointer | |
214 | mov b0 = GR_SAVE_B0 // Restore return address | |
215 | };; | |
216 | ||
217 | { .mib | |
218 | mov gp = GR_SAVE_GP // Restore gp | |
219 | mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs | |
220 | br.ret.sptk b0 // Return | |
221 | };; | |
222 | ||
223 | LOCAL_LIBM_END(__libm_error_region) | |
224 | ||
225 | ||
226 | .type __libm_error_support#,@function | |
227 | .global __libm_error_support# | |
228 |