]>
Commit | Line | Data |
---|---|---|
8da2915d UD |
1 | .file "roundf.s" |
2 | ||
0ecb606c JJ |
3 | |
4 | // Copyright (c) 2000 - 2003, Intel Corporation | |
8da2915d | 5 | // All rights reserved. |
0ecb606c JJ |
6 | // |
7 | // Contributed 2000 by the Intel Numerics Group, Intel Corporation | |
aeb25823 AJ |
8 | // |
9 | // Redistribution and use in source and binary forms, with or without | |
10 | // modification, are permitted provided that the following conditions are | |
11 | // met: | |
12 | // | |
13 | // * Redistributions of source code must retain the above copyright | |
14 | // notice, this list of conditions and the following disclaimer. | |
15 | // | |
16 | // * Redistributions in binary form must reproduce the above copyright | |
17 | // notice, this list of conditions and the following disclaimer in the | |
18 | // documentation and/or other materials provided with the distribution. | |
19 | // | |
20 | // * The name of Intel Corporation may not be used to endorse or promote | |
21 | // products derived from this software without specific prior written | |
22 | // permission. | |
0ecb606c JJ |
23 | |
24 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
25 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
8da2915d | 26 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
0ecb606c | 27 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS |
8da2915d | 28 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
0ecb606c JJ |
29 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
30 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
31 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
8da2915d | 32 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING |
0ecb606c JJ |
33 | // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
34 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
35 | // | |
8da2915d | 36 | // Intel Corporation is the author of this code, and requests that all |
0ecb606c JJ |
37 | // problem reports or change requests be submitted to it directly at |
38 | // http://www.intel.com/software/products/opensource/libraries/num.htm. | |
8da2915d UD |
39 | // |
40 | // History | |
41 | //============================================================== | |
0ecb606c JJ |
42 | // 10/25/00 Initial version |
43 | // 06/14/01 Changed cmp to an equivalent form | |
44 | // 05/20/02 Cleaned up namespace and sf0 syntax | |
45 | // 01/20/03 Improved performance and reduced code size | |
46 | // 04/18/03 Eliminate possible WAW dependency warning | |
47 | // 09/03/03 Improved performance | |
8da2915d | 48 | //============================================================== |
0ecb606c | 49 | |
8da2915d UD |
50 | // API |
51 | //============================================================== | |
52 | // float roundf(float x) | |
0ecb606c | 53 | //============================================================== |
8da2915d | 54 | |
0ecb606c JJ |
55 | // general input registers: |
56 | // r14 - r18 | |
8da2915d | 57 | |
0ecb606c JJ |
58 | rSignexp = r14 |
59 | rExp = r15 | |
60 | rExpMask = r16 | |
61 | rBigexp = r17 | |
62 | rExpHalf = r18 | |
8da2915d | 63 | |
0ecb606c JJ |
64 | // floating-point registers: |
65 | // f8 - f13 | |
8da2915d | 66 | |
0ecb606c JJ |
67 | fXtruncInt = f9 |
68 | fNormX = f10 | |
69 | fHalf = f11 | |
70 | fInc = f12 | |
71 | fRem = f13 | |
72 | ||
73 | // predicate registers used: | |
74 | // p6 - p10 | |
8da2915d UD |
75 | |
76 | // Overview of operation | |
77 | //============================================================== | |
8da2915d | 78 | // float roundf(float x) |
0ecb606c JJ |
79 | // Return an integer value (represented as a float) that is x |
80 | // rounded to nearest integer, halfway cases rounded away from | |
81 | // zero. | |
8da2915d UD |
82 | // if x>0 result = trunc(x+0.5) |
83 | // if x<0 result = trunc(x-0.5) | |
0ecb606c JJ |
84 | // |
85 | //============================================================== | |
8da2915d | 86 | |
0ecb606c JJ |
87 | // double_extended |
88 | // if the exponent is > 1003e => 3F(true) = 63(decimal) | |
89 | // we have a significand of 64 bits 1.63-bits. | |
90 | // If we multiply by 2^63, we no longer have a fractional part | |
91 | // So input is an integer value already. | |
8da2915d | 92 | |
0ecb606c JJ |
93 | // double |
94 | // if the exponent is >= 10033 => 34(true) = 52(decimal) | |
95 | // 34 + 3ff = 433 | |
96 | // we have a significand of 53 bits 1.52-bits. (implicit 1) | |
97 | // If we multiply by 2^52, we no longer have a fractional part | |
98 | // So input is an integer value already. | |
8da2915d | 99 | |
0ecb606c JJ |
100 | // single |
101 | // if the exponent is > 10016 => 17(true) = 23(decimal) | |
102 | // we have a significand of 24 bits 1.23-bits. (implicit 1) | |
103 | // If we multiply by 2^23, we no longer have a fractional part | |
104 | // So input is an integer value already. | |
8da2915d | 105 | |
8da2915d UD |
106 | |
107 | .section .text | |
0ecb606c | 108 | GLOBAL_LIBM_ENTRY(roundf) |
8da2915d | 109 | |
8da2915d | 110 | { .mfi |
0ecb606c JJ |
111 | getf.exp rSignexp = f8 // Get signexp, recompute if unorm |
112 | fcvt.fx.trunc.s1 fXtruncInt = f8 // Convert to int in significand | |
113 | addl rBigexp = 0x10016, r0 // Set exponent at which is integer | |
114 | } | |
8da2915d | 115 | { .mfi |
0ecb606c JJ |
116 | mov rExpHalf = 0x0FFFE // Form sign and exponent of 0.5 |
117 | fnorm.s1 fNormX = f8 // Normalize input | |
118 | mov rExpMask = 0x1FFFF // Form exponent mask | |
8da2915d | 119 | } |
0ecb606c | 120 | ;; |
8da2915d | 121 | |
8da2915d | 122 | { .mfi |
0ecb606c JJ |
123 | setf.exp fHalf = rExpHalf // Form 0.5 |
124 | fclass.m p7,p0 = f8, 0x0b // Test x unorm | |
125 | nop.i 0 | |
8da2915d | 126 | } |
0ecb606c JJ |
127 | ;; |
128 | ||
129 | { .mfb | |
130 | nop.m 0 | |
131 | fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf | |
132 | (p7) br.cond.spnt ROUND_UNORM // Branch if x unorm | |
8da2915d | 133 | } |
0ecb606c | 134 | ;; |
8da2915d | 135 | |
0ecb606c JJ |
136 | ROUND_COMMON: |
137 | // Return here from ROUND_UNORM | |
138 | { .mfb | |
139 | nop.m 0 | |
140 | fcmp.lt.s1 p8,p9 = f8, f0 // Test if x < 0 | |
141 | (p6) br.cond.spnt ROUND_SPECIAL // Exit if x natval, nan, inf | |
8da2915d | 142 | } |
0ecb606c JJ |
143 | ;; |
144 | ||
8da2915d | 145 | { .mfi |
0ecb606c JJ |
146 | nop.m 0 |
147 | fcvt.xf f8 = fXtruncInt // Pre-Result if 0.5 <= |x| < 2^23 | |
148 | nop.i 0 | |
8da2915d | 149 | } |
0ecb606c | 150 | ;; |
8da2915d | 151 | |
8da2915d | 152 | { .mfi |
0ecb606c JJ |
153 | and rExp = rSignexp, rExpMask // Get biased exponent |
154 | fmerge.s fInc = fNormX, f1 // Form increment if |rem| >= 0.5 | |
155 | nop.i 0 | |
8da2915d | 156 | } |
0ecb606c | 157 | ;; |
8da2915d | 158 | |
0ecb606c JJ |
159 | { .mmi |
160 | cmp.lt p6,p0 = rExp, rExpHalf // Is |x| < 0.5? | |
161 | cmp.ge p7,p0 = rExp, rBigexp // Is |x| >= 2^23? | |
162 | cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5? | |
8da2915d | 163 | } |
0ecb606c JJ |
164 | ;; |
165 | ||
166 | // We must correct result if |x| < 0.5, or |x| >= 2^23 | |
167 | .pred.rel "mutex",p6,p7 | |
8da2915d | 168 | { .mfi |
0ecb606c JJ |
169 | nop.m 0 |
170 | (p6) fmerge.s f8 = fNormX, f0 // If |x| < 0.5, result sgn(x)*0 | |
171 | nop.i 0 | |
8da2915d | 172 | } |
0ecb606c JJ |
173 | { .mfb |
174 | (p7) cmp.eq p10,p0 = r0, r0 // Also turn on p10 if |x| >= 2^23 | |
175 | (p7) fma.s.s0 f8 = fNormX, f1, f0 // If |x| >= 2^23, result x | |
176 | (p10) br.ret.spnt b0 // Exit |x| < 0.5 or |x| >= 2^23 | |
177 | } | |
178 | ;; | |
8da2915d | 179 | |
0ecb606c | 180 | // Here if 0.5 <= |x| < 2^23 |
8da2915d | 181 | { .mfi |
0ecb606c JJ |
182 | nop.m 0 |
183 | (p9) fms.s1 fRem = fNormX, f1, f8 // Get remainder = x - trunc(x) | |
184 | nop.i 0 | |
8da2915d | 185 | } |
8da2915d | 186 | { .mfi |
0ecb606c JJ |
187 | nop.m 0 |
188 | (p8) fms.s1 fRem = f8, f1, fNormX // Get remainder = trunc(x) - x | |
189 | nop.i 0 | |
8da2915d | 190 | } |
0ecb606c | 191 | ;; |
8da2915d | 192 | |
8da2915d | 193 | { .mfi |
0ecb606c JJ |
194 | nop.m 0 |
195 | fcmp.ge.s1 p9,p0 = fRem, fHalf // Test |rem| >= 0.5 | |
196 | nop.i 0 | |
8da2915d | 197 | } |
0ecb606c JJ |
198 | ;; |
199 | ||
200 | // If x < 0 and remainder <= -0.5, then subtract 1 from result | |
201 | // If x > 0 and remainder >= +0.5, then add 1 to result | |
202 | { .mfb | |
203 | nop.m 0 | |
204 | (p9) fma.s.s0 f8 = f8, f1, fInc | |
205 | br.ret.sptk b0 | |
8da2915d | 206 | } |
0ecb606c JJ |
207 | ;; |
208 | ||
209 | ||
210 | ROUND_SPECIAL: | |
211 | // Here if x natval, nan, inf | |
212 | { .mfb | |
213 | nop.m 0 | |
214 | fma.s.s0 f8 = f8, f1, f0 | |
215 | br.ret.sptk b0 | |
8da2915d | 216 | } |
0ecb606c | 217 | ;; |
8da2915d | 218 | |
0ecb606c JJ |
219 | ROUND_UNORM: |
220 | // Here if x unorm | |
8da2915d | 221 | { .mfi |
0ecb606c JJ |
222 | getf.exp rSignexp = fNormX // Get signexp, recompute if unorm |
223 | fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag | |
224 | nop.i 0 | |
8da2915d | 225 | } |
8da2915d | 226 | { .mfb |
0ecb606c JJ |
227 | nop.m 0 |
228 | fcvt.fx.trunc.s1 fXtruncInt = fNormX // Convert to int in significand | |
229 | br.cond.sptk ROUND_COMMON // Return to main path | |
8da2915d | 230 | } |
0ecb606c | 231 | ;; |
8da2915d | 232 | |
0ecb606c | 233 | GLOBAL_LIBM_END(roundf) |