]>
Commit | Line | Data |
---|---|---|
8da2915d UD |
1 | //.file "scalbnf.s" |
2 | ||
aeb25823 | 3 | // Copyright (C) 2000, 2001, Intel Corporation |
8da2915d UD |
4 | // All rights reserved. |
5 | // | |
6 | // Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story, | |
7 | // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation. | |
aeb25823 AJ |
8 | // |
9 | // Redistribution and use in source and binary forms, with or without | |
10 | // modification, are permitted provided that the following conditions are | |
11 | // met: | |
12 | // | |
13 | // * Redistributions of source code must retain the above copyright | |
14 | // notice, this list of conditions and the following disclaimer. | |
15 | // | |
16 | // * Redistributions in binary form must reproduce the above copyright | |
17 | // notice, this list of conditions and the following disclaimer in the | |
18 | // documentation and/or other materials provided with the distribution. | |
19 | // | |
20 | // * The name of Intel Corporation may not be used to endorse or promote | |
21 | // products derived from this software without specific prior written | |
22 | // permission. | |
23 | // | |
8da2915d UD |
24 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
25 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
26 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
27 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS | |
28 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
29 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
30 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
31 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
32 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING | |
33 | // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
34 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
35 | // | |
36 | // Intel Corporation is the author of this code, and requests that all | |
37 | // problem reports or change requests be submitted to it directly at | |
38 | // http://developer.intel.com/opensource. | |
39 | // | |
40 | // History | |
41 | //============================================================== | |
42 | // 2/02/00 Initial version | |
43 | // 1/26/01 scalbnf completely reworked and now standalone version | |
44 | // | |
45 | // API | |
46 | //============================================================== | |
47 | // float = scalbnf (float x, int n) | |
48 | // input floating point f8 and int n (r33) | |
49 | // output floating point f8 | |
50 | // | |
51 | // Returns x* 2**n using an fma and detects overflow | |
52 | // and underflow. | |
53 | // | |
54 | // | |
55 | ||
56 | #include "libm_support.h" | |
57 | ||
58 | FR_Big = f6 | |
59 | FR_NBig = f7 | |
60 | FR_Floating_X = f8 | |
61 | FR_Result = f8 | |
62 | FR_Result2 = f9 | |
63 | FR_Result3 = f11 | |
64 | FR_Norm_X = f12 | |
65 | FR_Two_N = f14 | |
66 | FR_Two_to_Big = f15 | |
67 | ||
68 | GR_N_Biased = r15 | |
69 | GR_Big = r16 | |
70 | GR_NBig = r17 | |
71 | GR_Scratch = r18 | |
72 | GR_Scratch1 = r19 | |
73 | GR_Bias = r20 | |
74 | GR_N_as_int = r21 | |
75 | ||
76 | GR_SAVE_B0 = r32 | |
77 | GR_SAVE_GP = r33 | |
78 | GR_SAVE_PFS = r34 | |
79 | GR_Parameter_X = r35 | |
80 | GR_Parameter_Y = r36 | |
81 | GR_Parameter_RESULT = r37 | |
82 | GR_Tag = r38 | |
83 | ||
84 | .align 32 | |
85 | .global scalbnf | |
86 | ||
87 | .section .text | |
88 | .proc scalbnf | |
89 | .align 32 | |
90 | ||
91 | scalbnf: | |
92 | ||
93 | // | |
94 | // Is x NAN, INF, ZERO, +-? | |
95 | // Build the exponent Bias | |
96 | // | |
97 | { .mfi | |
98 | alloc r32=ar.pfs,1,2,4,0 | |
99 | fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero | |
100 | addl GR_Bias = 0x0FFFF,r0 | |
101 | } | |
102 | ||
103 | // | |
104 | // Sign extend input | |
105 | // Is N zero? | |
106 | // Normalize x | |
107 | // | |
108 | { .mfi | |
109 | cmp.eq.unc p6,p0 = r33,r0 | |
110 | fnorm.s1 FR_Norm_X = FR_Floating_X | |
111 | sxt4 GR_N_as_int = r33 | |
112 | } | |
113 | ;; | |
114 | ||
115 | // | |
116 | // Normalize x | |
117 | // Branch and return special values. | |
118 | // Create -35000 | |
119 | // Create 35000 | |
120 | // | |
121 | { .mfi | |
122 | addl GR_Big = 35000,r0 | |
123 | nop.f 0 | |
124 | add GR_N_Biased = GR_Bias,GR_N_as_int | |
125 | } | |
126 | { .mfb | |
127 | addl GR_NBig = -35000,r0 | |
128 | (p7) fma.s.s0 FR_Result = FR_Floating_X,f1, f0 | |
129 | (p7) br.ret.spnt b0 | |
130 | };; | |
131 | ||
132 | // | |
133 | // Build the exponent Bias | |
134 | // Return x when N = 0 | |
135 | // | |
136 | { .mfi | |
137 | setf.exp FR_Two_N = GR_N_Biased | |
138 | nop.f 0 | |
139 | addl GR_Scratch1 = 0x063BF,r0 | |
140 | } | |
141 | { .mfb | |
142 | addl GR_Scratch = 0x019C3F,r0 | |
143 | (p6) fma.s.s0 FR_Result = FR_Floating_X,f1, f0 | |
144 | (p6) br.ret.spnt b0 | |
145 | };; | |
146 | ||
147 | // | |
148 | // Create 2*big | |
149 | // Create 2**-big | |
150 | // Is N > 35000 | |
151 | // Is N < -35000 | |
152 | // Raise Denormal operand flag with compare | |
153 | // Main path, create 2**N | |
154 | // | |
155 | { .mfi | |
156 | setf.exp FR_NBig = GR_Scratch1 | |
157 | nop.f 0 | |
158 | cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big | |
159 | } | |
160 | { .mfi | |
161 | setf.exp FR_Big = GR_Scratch | |
162 | fcmp.ge.s0 p0,p11 = FR_Floating_X,f0 | |
163 | cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig | |
164 | };; | |
165 | ||
166 | // | |
167 | // Adjust 2**N if N was very small or very large | |
168 | // | |
169 | { .mfi | |
170 | nop.m 0 | |
171 | (p6) fma.s1 FR_Two_N = FR_Big,f1,f0 | |
172 | nop.i 0 | |
173 | } | |
174 | { .mlx | |
175 | nop.m 999 | |
176 | (p0) movl GR_Scratch = 0x000000000003007F | |
177 | };; | |
178 | ||
179 | ||
180 | { .mfi | |
181 | nop.m 0 | |
182 | (p8) fma.s1 FR_Two_N = FR_NBig,f1,f0 | |
183 | nop.i 0 | |
184 | } | |
185 | { .mlx | |
186 | nop.m 999 | |
187 | (p0) movl GR_Scratch1= 0x000000000001007F | |
188 | };; | |
189 | ||
190 | // Set up necessary status fields | |
191 | // | |
192 | // S0 user supplied status | |
193 | // S2 user supplied status + WRE + TD (Overflows) | |
194 | // S3 user supplied status + FZ + TD (Underflows) | |
195 | // | |
196 | { .mfi | |
197 | nop.m 999 | |
198 | (p0) fsetc.s3 0x7F,0x41 | |
199 | nop.i 999 | |
200 | } | |
201 | { .mfi | |
202 | nop.m 999 | |
203 | (p0) fsetc.s2 0x7F,0x42 | |
204 | nop.i 999 | |
205 | };; | |
206 | ||
207 | // | |
208 | // Do final operation | |
209 | // | |
210 | { .mfi | |
211 | setf.exp FR_NBig = GR_Scratch | |
212 | fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 | |
213 | nop.i 999 | |
214 | } | |
215 | { .mfi | |
216 | nop.m 999 | |
217 | fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 | |
218 | nop.i 999 | |
219 | };; | |
220 | { .mfi | |
221 | setf.exp FR_Big = GR_Scratch1 | |
222 | fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 | |
223 | nop.i 999 | |
224 | };; | |
225 | ||
226 | // Check for overflow or underflow. | |
227 | // Restore s3 | |
228 | // Restore s2 | |
229 | // | |
230 | { .mfi | |
231 | nop.m 0 | |
232 | fsetc.s3 0x7F,0x40 | |
233 | nop.i 999 | |
234 | } | |
235 | { .mfi | |
236 | nop.m 0 | |
237 | fsetc.s2 0x7F,0x40 | |
238 | nop.i 999 | |
239 | };; | |
240 | ||
241 | // | |
242 | // Is the result zero? | |
243 | // | |
244 | { .mfi | |
245 | nop.m 999 | |
246 | fclass.m.unc p6, p0 = FR_Result3, 0x007 | |
247 | nop.i 999 | |
248 | } | |
249 | { .mfi | |
250 | addl GR_Tag = 178, r0 | |
251 | fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big | |
252 | nop.i 0 | |
253 | };; | |
254 | ||
255 | // | |
256 | // Detect masked underflow - Tiny + Inexact Only | |
257 | // | |
258 | { .mfi | |
259 | nop.m 999 | |
260 | (p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2 | |
261 | nop.i 999 | |
262 | };; | |
263 | ||
264 | // | |
265 | // Is result bigger the allowed range? | |
266 | // Branch out for underflow | |
267 | // | |
268 | { .mfb | |
269 | (p6) addl GR_Tag = 179, r0 | |
270 | (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig | |
271 | (p6) br.cond.spnt L(scalbnf_UNDERFLOW) | |
272 | };; | |
273 | ||
274 | // | |
275 | // Branch out for overflow | |
276 | // | |
277 | { .mbb | |
278 | nop.m 0 | |
279 | (p7) br.cond.spnt L(scalbnf_OVERFLOW) | |
280 | (p9) br.cond.spnt L(scalbnf_OVERFLOW) | |
281 | };; | |
282 | ||
283 | // | |
284 | // Return from main path. | |
285 | // | |
286 | { .mfb | |
287 | nop.m 999 | |
288 | nop.f 0 | |
289 | br.ret.sptk b0;; | |
290 | } | |
291 | ||
292 | .endp scalbnf | |
293 | ASM_SIZE_DIRECTIVE(scalbnf) | |
294 | .proc __libm_error_region | |
295 | __libm_error_region: | |
296 | ||
297 | L(scalbnf_OVERFLOW): | |
298 | L(scalbnf_UNDERFLOW): | |
299 | ||
300 | // | |
301 | // Get stack address of N | |
302 | // | |
303 | .prologue | |
304 | { .mfi | |
305 | add GR_Parameter_Y=-32,sp | |
306 | nop.f 0 | |
307 | .save ar.pfs,GR_SAVE_PFS | |
308 | mov GR_SAVE_PFS=ar.pfs | |
309 | } | |
310 | // | |
311 | // Adjust sp | |
312 | // | |
313 | { .mfi | |
314 | .fframe 64 | |
315 | add sp=-64,sp | |
316 | nop.f 0 | |
317 | mov GR_SAVE_GP=gp | |
318 | };; | |
319 | ||
320 | // | |
321 | // Store N on stack in correct position | |
322 | // Locate the address of x on stack | |
323 | // | |
324 | { .mmi | |
325 | st8 [GR_Parameter_Y] = GR_N_as_int,16 | |
326 | add GR_Parameter_X = 16,sp | |
327 | .save b0, GR_SAVE_B0 | |
328 | mov GR_SAVE_B0=b0 | |
329 | };; | |
330 | ||
331 | // | |
332 | // Store x on the stack. | |
333 | // Get address for result on stack. | |
334 | // | |
335 | .body | |
336 | { .mib | |
337 | stfs [GR_Parameter_X] = FR_Norm_X | |
338 | add GR_Parameter_RESULT = 0,GR_Parameter_Y | |
339 | nop.b 0 | |
340 | } | |
341 | { .mib | |
342 | stfs [GR_Parameter_Y] = FR_Result | |
343 | add GR_Parameter_Y = -16,GR_Parameter_Y | |
344 | br.call.sptk b0=__libm_error_support# | |
345 | };; | |
346 | ||
347 | // | |
348 | // Get location of result on stack | |
349 | // | |
350 | { .mmi | |
351 | nop.m 0 | |
352 | nop.m 0 | |
353 | add GR_Parameter_RESULT = 48,sp | |
354 | };; | |
355 | ||
356 | // | |
357 | // Get the new result | |
358 | // | |
359 | { .mmi | |
360 | ldfs FR_Result = [GR_Parameter_RESULT] | |
361 | .restore sp | |
362 | add sp = 64,sp | |
363 | mov b0 = GR_SAVE_B0 | |
364 | };; | |
365 | ||
366 | // | |
367 | // Restore gp, ar.pfs and return | |
368 | // | |
369 | { .mib | |
370 | mov gp = GR_SAVE_GP | |
371 | mov ar.pfs = GR_SAVE_PFS | |
372 | br.ret.sptk b0 | |
373 | };; | |
374 | ||
375 | .endp __libm_error_region | |
376 | ASM_SIZE_DIRECTIVE(__libm_error_region) | |
377 | ||
378 | .type __libm_error_support#,@function | |
379 | .global __libm_error_support# |