]>
Commit | Line | Data |
---|---|---|
8da2915d UD |
1 | .file "floor.s" |
2 | ||
0ecb606c JJ |
3 | |
4 | // Copyright (c) 2000 - 2003, Intel Corporation | |
8da2915d | 5 | // All rights reserved. |
0ecb606c JJ |
6 | // |
7 | // Contributed 2000 by the Intel Numerics Group, Intel Corporation | |
aeb25823 AJ |
8 | // |
9 | // Redistribution and use in source and binary forms, with or without | |
10 | // modification, are permitted provided that the following conditions are | |
11 | // met: | |
12 | // | |
13 | // * Redistributions of source code must retain the above copyright | |
14 | // notice, this list of conditions and the following disclaimer. | |
15 | // | |
16 | // * Redistributions in binary form must reproduce the above copyright | |
17 | // notice, this list of conditions and the following disclaimer in the | |
18 | // documentation and/or other materials provided with the distribution. | |
19 | // | |
20 | // * The name of Intel Corporation may not be used to endorse or promote | |
21 | // products derived from this software without specific prior written | |
22 | // permission. | |
0ecb606c JJ |
23 | |
24 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
25 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
8da2915d | 26 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
0ecb606c | 27 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS |
8da2915d | 28 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
0ecb606c JJ |
29 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
30 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
31 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
8da2915d | 32 | // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING |
0ecb606c JJ |
33 | // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
34 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
35 | // | |
8da2915d | 36 | // Intel Corporation is the author of this code, and requests that all |
0ecb606c JJ |
37 | // problem reports or change requests be submitted to it directly at |
38 | // http://www.intel.com/software/products/opensource/libraries/num.htm. | |
8da2915d | 39 | // |
8da2915d UD |
40 | // History |
41 | //============================================================== | |
0ecb606c JJ |
42 | // 02/02/00 Initial version |
43 | // 03/22/00 Updated to improve performance | |
44 | // 06/13/00 Improved speed, fixed setting of inexact flag | |
45 | // 06/27/00 Eliminated incorrect invalid flag setting | |
46 | // 02/07/01 Corrected sign of zero result in round to -inf mode | |
47 | // 05/20/02 Cleaned up namespace and sf0 syntax | |
48 | // 01/28/03 Improved performance | |
49 | //============================================================== | |
8da2915d UD |
50 | |
51 | // API | |
52 | //============================================================== | |
53 | // double floor(double x) | |
0ecb606c | 54 | //============================================================== |
8da2915d | 55 | |
0ecb606c JJ |
56 | // general input registers: |
57 | // r14 - r18 | |
8da2915d | 58 | |
0ecb606c JJ |
59 | rSignexp = r14 |
60 | rExp = r15 | |
61 | rExpMask = r16 | |
62 | rBigexp = r17 | |
63 | rM1 = r18 | |
8da2915d | 64 | |
0ecb606c JJ |
65 | // floating-point registers: |
66 | // f8 - f13 | |
8da2915d | 67 | |
0ecb606c JJ |
68 | fXInt = f9 |
69 | fNormX = f10 | |
70 | fTmp = f11 | |
71 | fAdj = f12 | |
72 | fPreResult = f13 | |
8da2915d | 73 | |
0ecb606c JJ |
74 | // predicate registers used: |
75 | // p6 - p9 | |
8da2915d UD |
76 | |
77 | // Overview of operation | |
78 | //============================================================== | |
8da2915d | 79 | // double floor(double x) |
0ecb606c | 80 | // Return an integer value (represented as a double) that is the largest |
8da2915d UD |
81 | // value not greater than x |
82 | // This is x rounded toward -infinity to an integral value. | |
83 | // Inexact is set if x != floor(x) | |
0ecb606c | 84 | //============================================================== |
8da2915d UD |
85 | |
86 | // double_extended | |
87 | // if the exponent is > 1003e => 3F(true) = 63(decimal) | |
88 | // we have a significand of 64 bits 1.63-bits. | |
89 | // If we multiply by 2^63, we no longer have a fractional part | |
90 | // So input is an integer value already. | |
91 | ||
92 | // double | |
93 | // if the exponent is >= 10033 => 34(true) = 52(decimal) | |
94 | // 34 + 3ff = 433 | |
95 | // we have a significand of 53 bits 1.52-bits. (implicit 1) | |
96 | // If we multiply by 2^52, we no longer have a fractional part | |
97 | // So input is an integer value already. | |
98 | ||
99 | // single | |
100 | // if the exponent is > 10016 => 17(true) = 23(decimal) | |
101 | // we have a significand of 24 bits 1.23-bits. (implicit 1) | |
102 | // If we multiply by 2^23, we no longer have a fractional part | |
103 | // So input is an integer value already. | |
104 | ||
8da2915d | 105 | |
0ecb606c JJ |
106 | .section .text |
107 | GLOBAL_IEEE754_ENTRY(floor) | |
8da2915d UD |
108 | |
109 | { .mfi | |
0ecb606c JJ |
110 | getf.exp rSignexp = f8 // Get signexp, recompute if unorm |
111 | fclass.m p7,p0 = f8, 0x0b // Test x unorm | |
112 | addl rBigexp = 0x10033, r0 // Set exponent at which is integer | |
8da2915d UD |
113 | } |
114 | { .mfi | |
0ecb606c JJ |
115 | mov rM1 = -1 // Set all ones |
116 | fcvt.fx.trunc.s1 fXInt = f8 // Convert to int in significand | |
117 | mov rExpMask = 0x1FFFF // Form exponent mask | |
8da2915d | 118 | } |
0ecb606c | 119 | ;; |
8da2915d | 120 | |
8da2915d | 121 | { .mfi |
0ecb606c JJ |
122 | nop.m 0 |
123 | fcmp.lt.s1 p8,p9 = f8, f0 // Test x < 0 | |
124 | nop.i 0 | |
8da2915d | 125 | } |
0ecb606c JJ |
126 | { .mfb |
127 | setf.sig fTmp = rM1 // Make const for setting inexact | |
128 | fnorm.s1 fNormX = f8 // Normalize input | |
129 | (p7) br.cond.spnt FLOOR_UNORM // Branch if x unorm | |
8da2915d | 130 | } |
0ecb606c | 131 | ;; |
8da2915d | 132 | |
0ecb606c JJ |
133 | FLOOR_COMMON: |
134 | // Return here from FLOOR_UNORM | |
135 | { .mfi | |
136 | nop.m 0 | |
137 | fclass.m p6,p0 = f8, 0x1e7 // Test x natval, nan, inf, 0 | |
138 | nop.i 0 | |
8da2915d | 139 | } |
0ecb606c | 140 | ;; |
8da2915d | 141 | |
8da2915d | 142 | .pred.rel "mutex",p8,p9 |
8da2915d | 143 | { .mfi |
0ecb606c JJ |
144 | nop.m 0 |
145 | (p8) fnma.s1 fAdj = f1, f1, f0 // If x < 0, adjustment is -1 | |
146 | nop.i 0 | |
8da2915d UD |
147 | } |
148 | { .mfi | |
0ecb606c JJ |
149 | nop.m 0 |
150 | (p9) fma.s1 fAdj = f0, f0, f0 // If x > 0, adjustment is 0 | |
151 | nop.i 0 | |
8da2915d | 152 | } |
0ecb606c | 153 | ;; |
8da2915d UD |
154 | |
155 | { .mfi | |
0ecb606c JJ |
156 | nop.m 0 |
157 | fcvt.xf fPreResult = fXInt // trunc(x) | |
158 | nop.i 0 | |
8da2915d | 159 | } |
0ecb606c JJ |
160 | { .mfb |
161 | nop.m 0 | |
162 | (p6) fma.d.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf, 0 | |
163 | (p6) br.ret.spnt b0 // Exit if x natval, nan, inf, 0 | |
8da2915d | 164 | } |
0ecb606c | 165 | ;; |
8da2915d | 166 | |
0ecb606c JJ |
167 | { .mmi |
168 | and rExp = rSignexp, rExpMask // Get biased exponent | |
169 | ;; | |
170 | cmp.ge p7,p6 = rExp, rBigexp // Is |x| >= 2^52? | |
171 | nop.i 0 | |
8da2915d | 172 | } |
0ecb606c | 173 | ;; |
8da2915d UD |
174 | |
175 | { .mfi | |
0ecb606c JJ |
176 | nop.m 0 |
177 | (p6) fma.d.s0 f8 = fPreResult, f1, fAdj // Result if !int, |x| < 2^52 | |
178 | nop.i 0 | |
8da2915d | 179 | } |
8da2915d | 180 | { .mfi |
0ecb606c JJ |
181 | nop.m 0 |
182 | (p7) fma.d.s0 f8 = fNormX, f1, f0 // Result, if |x| >= 2^52 | |
183 | nop.i 0 | |
8da2915d | 184 | } |
0ecb606c | 185 | ;; |
8da2915d UD |
186 | |
187 | { .mfi | |
0ecb606c JJ |
188 | nop.m 0 |
189 | (p6) fcmp.eq.unc.s1 p8, p9 = fPreResult, fNormX // Is trunc(x) = x ? | |
190 | nop.i 0 | |
8da2915d | 191 | } |
0ecb606c | 192 | ;; |
8da2915d | 193 | |
8da2915d | 194 | { .mfi |
0ecb606c JJ |
195 | nop.m 0 |
196 | (p9) fmpy.s0 fTmp = fTmp, fTmp // Dummy to set inexact | |
197 | nop.i 0 | |
8da2915d | 198 | } |
8da2915d | 199 | { .mfb |
0ecb606c JJ |
200 | nop.m 0 |
201 | (p8) fma.d.s0 f8 = fNormX, f1, f0 // If x int, result normalized x | |
202 | br.ret.sptk b0 // Exit main path, 0 < |x| < 2^52 | |
8da2915d | 203 | } |
0ecb606c JJ |
204 | ;; |
205 | ||
8da2915d | 206 | |
0ecb606c JJ |
207 | FLOOR_UNORM: |
208 | // Here if x unorm | |
8da2915d | 209 | { .mfb |
0ecb606c JJ |
210 | getf.exp rSignexp = fNormX // Get signexp, recompute if unorm |
211 | fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag | |
212 | br.cond.sptk FLOOR_COMMON // Return to main path | |
8da2915d | 213 | } |
0ecb606c | 214 | ;; |
8da2915d | 215 | |
0ecb606c | 216 | GLOBAL_IEEE754_END(floor) |