]>
Commit | Line | Data |
---|---|---|
748086b7 | 1 | /* Copyright (C) 2007, 2009 Free Software Foundation, Inc. |
200359e8 L |
2 | |
3 | This file is part of GCC. | |
4 | ||
5 | GCC is free software; you can redistribute it and/or modify it under | |
6 | the terms of the GNU General Public License as published by the Free | |
748086b7 | 7 | Software Foundation; either version 3, or (at your option) any later |
200359e8 L |
8 | version. |
9 | ||
200359e8 L |
10 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 | for more details. | |
14 | ||
748086b7 JJ |
15 | Under Section 7 of GPL version 3, you are granted additional |
16 | permissions described in the GCC Runtime Library Exception, version | |
17 | 3.1, as published by the Free Software Foundation. | |
18 | ||
19 | You should have received a copy of the GNU General Public License and | |
20 | a copy of the GCC Runtime Library Exception along with this program; | |
21 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 | <http://www.gnu.org/licenses/>. */ | |
200359e8 L |
23 | |
24 | #ifndef _DIV_MACROS_H_ | |
25 | #define _DIV_MACROS_H_ | |
26 | ||
27 | #include "bid_internal.h" | |
28 | ||
29 | #define FENCE __fence | |
30 | //#define FENCE | |
31 | ||
32 | //#define DOUBLE_EXTENDED_ON | |
33 | ||
34 | #if DOUBLE_EXTENDED_ON | |
35 | ||
36 | ||
37 | __BID_INLINE__ void | |
38 | __div_128_by_128 (UINT128 * pCQ, UINT128 * pCR, UINT128 CX, UINT128 CY) { | |
39 | UINT128 CB, CB2, CB4, CB8, CQB, CA; | |
40 | int_double d64, dm64, ds; | |
41 | int_float t64; | |
42 | double dx, dq, dqh; | |
b2a00c89 | 43 | BINARY80 lq, lx, ly; |
200359e8 L |
44 | UINT64 Rh, R, B2, B4, Ph, Ql, Ql2, carry, Qh; |
45 | ||
46 | if (!CY.w[1]) { | |
47 | pCR->w[1] = 0; | |
48 | ||
49 | if (!CX.w[1]) { | |
50 | pCQ->w[0] = CX.w[0] / CY.w[0]; | |
51 | pCQ->w[1] = 0; | |
52 | pCR->w[1] = 0; | |
53 | pCR->w[0] = CX.w[0] - pCQ->w[0] * CY.w[0]; | |
54 | } else { | |
55 | ||
56 | // This path works for CX<2^116 only | |
57 | ||
58 | // 2^64 | |
59 | d64.i = 0x43f0000000000000; | |
60 | // 2^64 | |
61 | dm64.i = 0x3bf0000000000000; | |
62 | // 1.5*2^(-52) | |
63 | ds.i = 0x3cb8000000000000; | |
b2a00c89 L |
64 | dx = (BINARY80) CX.w[1] * d64.d + (BINARY80) CX.w[0]; |
65 | dq = dx / (BINARY80) CY.w[0]; | |
200359e8 L |
66 | dq -= dq * (ds.d); |
67 | dqh = dq * dm64.d; | |
68 | Qh = (UINT64) dqh; | |
69 | Ql = (UINT64) (dq - ((double) Qh) * d64.d); | |
70 | ||
71 | Rh = CX.w[0] - Ql * CY.w[0]; | |
72 | Ql2 = Rh / CY.w[0]; | |
73 | pCR->w[0] = Rh - Ql2 * CY.w[0]; | |
74 | __add_carry_out ((pCQ->w[0]), carry, Ql, Ql2); | |
75 | pCQ->w[1] = Qh + carry; | |
76 | ||
77 | } | |
78 | return; | |
79 | } | |
80 | // now CY.w[1] > 0 | |
81 | ||
82 | // 2^64 | |
83 | t64.i = 0x5f800000; | |
b2a00c89 L |
84 | lx = (BINARY80) CX.w[1] * (BINARY80) t64.d + (BINARY80) CX.w[0]; |
85 | ly = (BINARY80) CY.w[1] * (BINARY80) t64.d + (BINARY80) CY.w[0]; | |
200359e8 L |
86 | lq = lx / ly; |
87 | pCQ->w[0] = (UINT64) lq; | |
88 | ||
89 | pCQ->w[1] = 0; | |
90 | ||
91 | if (!pCQ->w[0]) { | |
92 | /*if(__unsigned_compare_ge_128(CX,CY)) | |
93 | { | |
94 | pCQ->w[0] = 1; | |
95 | __sub_128_128((*pCR), CX, CY); | |
96 | } | |
97 | else */ | |
98 | { | |
99 | pCR->w[1] = CX.w[1]; | |
100 | pCR->w[0] = CX.w[0]; | |
101 | } | |
102 | return; | |
103 | } | |
104 | ||
105 | if (CY.w[1] >= 16 || pCQ->w[0] <= 0x1000000000000000ull) { | |
106 | pCQ->w[0] = (UINT64) lq - 1; | |
107 | __mul_64x128_full (Ph, CQB, (pCQ->w[0]), CY); | |
108 | __sub_128_128 (CA, CX, CQB); | |
109 | if (__unsigned_compare_ge_128 (CA, CY)) { | |
110 | __sub_128_128 (CA, CA, CY); | |
111 | pCQ->w[0]++; | |
112 | if (__unsigned_compare_ge_128 (CA, CY)) { | |
113 | __sub_128_128 (CA, CA, CY); | |
114 | pCQ->w[0]++; | |
115 | } | |
116 | } | |
117 | pCR->w[1] = CA.w[1]; | |
118 | pCR->w[0] = CA.w[0]; | |
119 | } else { | |
120 | pCQ->w[0] = (UINT64) lq - 6; | |
121 | ||
122 | __mul_64x128_full (Ph, CQB, (pCQ->w[0]), CY); | |
123 | __sub_128_128 (CA, CX, CQB); | |
124 | ||
125 | CB8.w[1] = (CY.w[1] << 3) | (CY.w[0] >> 61); | |
126 | CB8.w[0] = CY.w[0] << 3; | |
127 | CB4.w[1] = (CY.w[1] << 2) | (CY.w[0] >> 62); | |
128 | CB4.w[0] = CY.w[0] << 2; | |
129 | CB2.w[1] = (CY.w[1] << 1) | (CY.w[0] >> 63); | |
130 | CB2.w[0] = CY.w[0] << 1; | |
131 | ||
132 | if (__unsigned_compare_ge_128 (CA, CB8)) { | |
133 | pCQ->w[0] += 8; | |
134 | __sub_128_128 (CA, CA, CB8); | |
135 | } | |
136 | if (__unsigned_compare_ge_128 (CA, CB4)) { | |
137 | pCQ->w[0] += 4; | |
138 | __sub_128_128 (CA, CA, CB4); | |
139 | } | |
140 | if (__unsigned_compare_ge_128 (CA, CB2)) { | |
141 | pCQ->w[0] += 2; | |
142 | __sub_128_128 (CA, CA, CB2); | |
143 | } | |
144 | if (__unsigned_compare_ge_128 (CA, CY)) { | |
145 | pCQ->w[0] += 1; | |
146 | __sub_128_128 (CA, CA, CY); | |
147 | } | |
148 | ||
149 | pCR->w[1] = CA.w[1]; | |
150 | pCR->w[0] = CA.w[0]; | |
151 | } | |
152 | } | |
153 | ||
154 | ||
155 | ||
156 | ||
157 | ||
158 | ||
159 | __BID_INLINE__ void | |
160 | __div_256_by_128 (UINT128 * pCQ, UINT256 * pCA4, UINT128 CY) { | |
161 | UINT256 CQ2Y; | |
162 | UINT128 CQ2, CQ3Y; | |
163 | UINT64 Q3, carry64; | |
164 | int_double d64; | |
b2a00c89 | 165 | BINARY80 lx, ly, lq, l64, l128; |
200359e8 L |
166 | |
167 | // 2^64 | |
168 | d64.i = 0x43f0000000000000ull; | |
b2a00c89 | 169 | l64 = (BINARY80) d64.d; |
200359e8 L |
170 | // 2^128 |
171 | l128 = l64 * l64; | |
172 | ||
173 | lx = | |
b2a00c89 L |
174 | ((BINARY80) (*pCA4).w[3] * l64 + |
175 | (BINARY80) (*pCA4).w[2]) * l128 + | |
176 | (BINARY80) (*pCA4).w[1] * l64 + (BINARY80) (*pCA4).w[0]; | |
177 | ly = (BINARY80) CY.w[1] * l128 + (BINARY80) CY.w[0] * l64; | |
200359e8 L |
178 | |
179 | lq = lx / ly; | |
180 | CQ2.w[1] = (UINT64) lq; | |
181 | lq = (lq - CQ2.w[1]) * l64; | |
182 | CQ2.w[0] = (UINT64) lq; | |
183 | ||
184 | // CQ2*CY | |
185 | __mul_128x128_to_256 (CQ2Y, CY, CQ2); | |
186 | ||
187 | // CQ2Y <= (*pCA4) ? | |
188 | if (CQ2Y.w[3] < (*pCA4).w[3] | |
189 | || (CQ2Y.w[3] == (*pCA4).w[3] | |
190 | && (CQ2Y.w[2] < (*pCA4).w[2] | |
191 | || (CQ2Y.w[2] == (*pCA4).w[2] | |
192 | && (CQ2Y.w[1] < (*pCA4).w[1] | |
193 | || (CQ2Y.w[1] == (*pCA4).w[1] | |
194 | && (CQ2Y.w[0] <= (*pCA4).w[0]))))))) { | |
195 | ||
196 | // (*pCA4) -CQ2Y, guaranteed below 5*2^49*CY < 5*2^(49+128) | |
197 | __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CQ2Y.w[0]); | |
198 | __sub_borrow_in_out ((*pCA4).w[1], carry64, (*pCA4).w[1], CQ2Y.w[1], | |
199 | carry64); | |
200 | (*pCA4).w[2] = (*pCA4).w[2] - CQ2Y.w[2] - carry64; | |
201 | ||
b2a00c89 L |
202 | lx = ((BINARY80) (*pCA4).w[2] * l128 + |
203 | ((BINARY80) (*pCA4).w[1] * l64 + | |
204 | (BINARY80) (*pCA4).w[0])) * l64; | |
200359e8 L |
205 | lq = lx / ly; |
206 | Q3 = (UINT64) lq; | |
207 | ||
208 | if (Q3) { | |
209 | Q3--; | |
210 | __mul_64x128_short (CQ3Y, Q3, CY); | |
211 | __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CQ3Y.w[0]); | |
212 | (*pCA4).w[1] = (*pCA4).w[1] - CQ3Y.w[1] - carry64; | |
213 | ||
214 | if ((*pCA4).w[1] > CY.w[1] | |
215 | || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) { | |
216 | Q3++; | |
217 | __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]); | |
218 | (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64; | |
219 | if ((*pCA4).w[1] > CY.w[1] | |
220 | || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) { | |
221 | Q3++; | |
222 | __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], | |
223 | CY.w[0]); | |
224 | (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64; | |
225 | } | |
226 | } | |
227 | // add Q3 to Q2 | |
228 | __add_carry_out (CQ2.w[0], carry64, Q3, CQ2.w[0]); | |
229 | CQ2.w[1] += carry64; | |
230 | } | |
231 | } else { | |
232 | // CQ2Y - (*pCA4), guaranteed below 5*2^(49+128) | |
233 | __sub_borrow_out ((*pCA4).w[0], carry64, CQ2Y.w[0], (*pCA4).w[0]); | |
234 | __sub_borrow_in_out ((*pCA4).w[1], carry64, CQ2Y.w[1], (*pCA4).w[1], | |
235 | carry64); | |
236 | (*pCA4).w[2] = CQ2Y.w[2] - (*pCA4).w[2] - carry64; | |
237 | ||
238 | lx = | |
b2a00c89 L |
239 | ((BINARY80) (*pCA4).w[2] * l128 + |
240 | (BINARY80) (*pCA4).w[1] * l64 + (BINARY80) (*pCA4).w[0]) * l64; | |
200359e8 L |
241 | lq = lx / ly; |
242 | Q3 = 1 + (UINT64) lq; | |
243 | ||
244 | __mul_64x128_short (CQ3Y, Q3, CY); | |
245 | __sub_borrow_out ((*pCA4).w[0], carry64, CQ3Y.w[0], (*pCA4).w[0]); | |
246 | (*pCA4).w[1] = CQ3Y.w[1] - (*pCA4).w[1] - carry64; | |
247 | ||
248 | if ((SINT64) (*pCA4).w[1] > (SINT64) CY.w[1] | |
249 | || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) { | |
250 | Q3--; | |
251 | __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]); | |
252 | (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64; | |
253 | } else if ((SINT64) (*pCA4).w[1] < 0) { | |
254 | Q3++; | |
255 | __add_carry_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]); | |
256 | (*pCA4).w[1] = (*pCA4).w[1] + CY.w[1] + carry64; | |
257 | } | |
258 | // subtract Q3 from Q2 | |
259 | __sub_borrow_out (CQ2.w[0], carry64, CQ2.w[0], Q3); | |
260 | CQ2.w[1] -= carry64; | |
261 | } | |
262 | ||
263 | // (*pCQ) + CQ2 + carry | |
264 | __add_carry_out ((*pCQ).w[0], carry64, CQ2.w[0], (*pCQ).w[0]); | |
265 | (*pCQ).w[1] = (*pCQ).w[1] + CQ2.w[1] + carry64; | |
266 | ||
267 | ||
268 | } | |
269 | #else | |
270 | ||
271 | __BID_INLINE__ void | |
272 | __div_128_by_128 (UINT128 * pCQ, UINT128 * pCR, UINT128 CX0, UINT128 CY) { | |
273 | UINT128 CY36, CY51, CQ, A2, CX, CQT; | |
274 | UINT64 Q; | |
275 | int_double t64, d49, d60; | |
276 | double lx, ly, lq; | |
277 | ||
278 | if (!CX0.w[1] && !CY.w[1]) { | |
279 | pCQ->w[0] = CX0.w[0] / CY.w[0]; | |
280 | pCQ->w[1] = 0; | |
281 | pCR->w[1] = pCR->w[0] = 0; | |
282 | pCR->w[0] = CX0.w[0] - pCQ->w[0] * CY.w[0]; | |
283 | return; | |
284 | } | |
285 | ||
286 | CX.w[1] = CX0.w[1]; | |
287 | CX.w[0] = CX0.w[0]; | |
288 | ||
289 | // 2^64 | |
290 | t64.i = 0x43f0000000000000ull; | |
291 | lx = (double) CX.w[1] * t64.d + (double) CX.w[0]; | |
292 | ly = (double) CY.w[1] * t64.d + (double) CY.w[0]; | |
293 | lq = lx / ly; | |
294 | ||
295 | CY36.w[1] = CY.w[0] >> (64 - 36); | |
296 | CY36.w[0] = CY.w[0] << 36; | |
297 | ||
298 | CQ.w[1] = CQ.w[0] = 0; | |
299 | ||
300 | // Q >= 2^100 ? | |
301 | if (!CY.w[1] && !CY36.w[1] && (CX.w[1] >= CY36.w[0])) { | |
302 | // then Q >= 2^100 | |
303 | ||
304 | // 2^(-60)*CX/CY | |
305 | d60.i = 0x3c30000000000000ull; | |
306 | lq *= d60.d; | |
b2a00c89 | 307 | Q = (UINT64) lq - 4ull; |
200359e8 L |
308 | |
309 | // Q*CY | |
310 | __mul_64x64_to_128 (A2, Q, CY.w[0]); | |
311 | ||
312 | // A2 <<= 60 | |
313 | A2.w[1] = (A2.w[1] << 60) | (A2.w[0] >> (64 - 60)); | |
314 | A2.w[0] <<= 60; | |
315 | ||
316 | __sub_128_128 (CX, CX, A2); | |
317 | ||
318 | lx = (double) CX.w[1] * t64.d + (double) CX.w[0]; | |
319 | lq = lx / ly; | |
320 | ||
321 | CQ.w[1] = Q >> (64 - 60); | |
322 | CQ.w[0] = Q << 60; | |
323 | } | |
324 | ||
325 | ||
326 | CY51.w[1] = (CY.w[1] << 51) | (CY.w[0] >> (64 - 51)); | |
327 | CY51.w[0] = CY.w[0] << 51; | |
328 | ||
329 | if (CY.w[1] < (UINT64) (1 << (64 - 51)) | |
330 | && (__unsigned_compare_gt_128 (CX, CY51))) { | |
331 | // Q > 2^51 | |
332 | ||
333 | // 2^(-49)*CX/CY | |
334 | d49.i = 0x3ce0000000000000ull; | |
335 | lq *= d49.d; | |
336 | ||
b2a00c89 | 337 | Q = (UINT64) lq - 1ull; |
200359e8 L |
338 | |
339 | // Q*CY | |
340 | __mul_64x64_to_128 (A2, Q, CY.w[0]); | |
341 | A2.w[1] += Q * CY.w[1]; | |
342 | ||
343 | // A2 <<= 49 | |
344 | A2.w[1] = (A2.w[1] << 49) | (A2.w[0] >> (64 - 49)); | |
345 | A2.w[0] <<= 49; | |
346 | ||
347 | __sub_128_128 (CX, CX, A2); | |
348 | ||
349 | CQT.w[1] = Q >> (64 - 49); | |
350 | CQT.w[0] = Q << 49; | |
351 | __add_128_128 (CQ, CQ, CQT); | |
352 | ||
353 | lx = (double) CX.w[1] * t64.d + (double) CX.w[0]; | |
354 | lq = lx / ly; | |
355 | } | |
356 | ||
357 | Q = (UINT64) lq; | |
358 | ||
359 | __mul_64x64_to_128 (A2, Q, CY.w[0]); | |
360 | A2.w[1] += Q * CY.w[1]; | |
361 | ||
362 | __sub_128_128 (CX, CX, A2); | |
363 | if ((SINT64) CX.w[1] < 0) { | |
364 | Q--; | |
365 | CX.w[0] += CY.w[0]; | |
366 | if (CX.w[0] < CY.w[0]) | |
367 | CX.w[1]++; | |
368 | CX.w[1] += CY.w[1]; | |
369 | if ((SINT64) CX.w[1] < 0) { | |
370 | Q--; | |
371 | CX.w[0] += CY.w[0]; | |
372 | if (CX.w[0] < CY.w[0]) | |
373 | CX.w[1]++; | |
374 | CX.w[1] += CY.w[1]; | |
375 | } | |
376 | } else if (__unsigned_compare_ge_128 (CX, CY)) { | |
377 | Q++; | |
378 | __sub_128_128 (CX, CX, CY); | |
379 | } | |
380 | ||
381 | __add_128_64 (CQ, CQ, Q); | |
382 | ||
383 | ||
384 | pCQ->w[1] = CQ.w[1]; | |
385 | pCQ->w[0] = CQ.w[0]; | |
386 | pCR->w[1] = CX.w[1]; | |
387 | pCR->w[0] = CX.w[0]; | |
388 | return; | |
389 | } | |
390 | ||
391 | ||
392 | __BID_INLINE__ void | |
393 | __div_256_by_128 (UINT128 * pCQ, UINT256 * pCA4, UINT128 CY) { | |
394 | UINT256 CA4, CA2, CY51, CY36; | |
395 | UINT128 CQ, A2, A2h, CQT; | |
396 | UINT64 Q, carry64; | |
397 | int_double t64, d49, d60; | |
398 | double lx, ly, lq, d128, d192; | |
399 | ||
400 | // the quotient is assumed to be at most 113 bits, | |
401 | // as needed by BID128 divide routines | |
402 | ||
403 | // initial dividend | |
404 | CA4.w[3] = (*pCA4).w[3]; | |
405 | CA4.w[2] = (*pCA4).w[2]; | |
406 | CA4.w[1] = (*pCA4).w[1]; | |
407 | CA4.w[0] = (*pCA4).w[0]; | |
408 | CQ.w[1] = (*pCQ).w[1]; | |
409 | CQ.w[0] = (*pCQ).w[0]; | |
410 | ||
411 | // 2^64 | |
412 | t64.i = 0x43f0000000000000ull; | |
413 | d128 = t64.d * t64.d; | |
414 | d192 = d128 * t64.d; | |
415 | lx = (double) CA4.w[3] * d192 + ((double) CA4.w[2] * d128 + | |
416 | ((double) CA4.w[1] * t64.d + | |
417 | (double) CA4.w[0])); | |
418 | ly = (double) CY.w[1] * t64.d + (double) CY.w[0]; | |
419 | lq = lx / ly; | |
420 | ||
421 | CY36.w[2] = CY.w[1] >> (64 - 36); | |
422 | CY36.w[1] = (CY.w[1] << 36) | (CY.w[0] >> (64 - 36)); | |
423 | CY36.w[0] = CY.w[0] << 36; | |
424 | ||
425 | CQ.w[1] = (*pCQ).w[1]; | |
426 | CQ.w[0] = (*pCQ).w[0]; | |
427 | ||
428 | // Q >= 2^100 ? | |
429 | if (CA4.w[3] > CY36.w[2] | |
430 | || (CA4.w[3] == CY36.w[2] | |
431 | && (CA4.w[2] > CY36.w[1] | |
432 | || (CA4.w[2] == CY36.w[1] && CA4.w[1] >= CY36.w[0])))) { | |
433 | // 2^(-60)*CA4/CY | |
434 | d60.i = 0x3c30000000000000ull; | |
435 | lq *= d60.d; | |
b2a00c89 | 436 | Q = (UINT64) lq - 4ull; |
200359e8 L |
437 | |
438 | // Q*CY | |
439 | __mul_64x128_to_192 (CA2, Q, CY); | |
440 | ||
441 | // CA2 <<= 60 | |
442 | // CA2.w[3] = CA2.w[2] >> (64-60); | |
443 | CA2.w[2] = (CA2.w[2] << 60) | (CA2.w[1] >> (64 - 60)); | |
444 | CA2.w[1] = (CA2.w[1] << 60) | (CA2.w[0] >> (64 - 60)); | |
445 | CA2.w[0] <<= 60; | |
446 | ||
447 | // CA4 -= CA2 | |
448 | __sub_borrow_out (CA4.w[0], carry64, CA4.w[0], CA2.w[0]); | |
449 | __sub_borrow_in_out (CA4.w[1], carry64, CA4.w[1], CA2.w[1], | |
450 | carry64); | |
451 | CA4.w[2] = CA4.w[2] - CA2.w[2] - carry64; | |
452 | ||
453 | lx = ((double) CA4.w[2] * d128 + | |
454 | ((double) CA4.w[1] * t64.d + (double) CA4.w[0])); | |
455 | lq = lx / ly; | |
456 | ||
457 | CQT.w[1] = Q >> (64 - 60); | |
458 | CQT.w[0] = Q << 60; | |
459 | __add_128_128 (CQ, CQ, CQT); | |
460 | } | |
461 | ||
462 | CY51.w[2] = CY.w[1] >> (64 - 51); | |
463 | CY51.w[1] = (CY.w[1] << 51) | (CY.w[0] >> (64 - 51)); | |
464 | CY51.w[0] = CY.w[0] << 51; | |
465 | ||
b2a00c89 L |
466 | if (CA4.w[2] > CY51.w[2] || ((CA4.w[2] == CY51.w[2]) |
467 | && | |
468 | (__unsigned_compare_gt_128 (CA4, CY51)))) | |
469 | { | |
200359e8 L |
470 | // Q > 2^51 |
471 | ||
472 | // 2^(-49)*CA4/CY | |
473 | d49.i = 0x3ce0000000000000ull; | |
474 | lq *= d49.d; | |
475 | ||
b2a00c89 | 476 | Q = (UINT64) lq - 1ull; |
200359e8 L |
477 | |
478 | // Q*CY | |
479 | __mul_64x64_to_128 (A2, Q, CY.w[0]); | |
480 | __mul_64x64_to_128 (A2h, Q, CY.w[1]); | |
481 | A2.w[1] += A2h.w[0]; | |
482 | if (A2.w[1] < A2h.w[0]) | |
483 | A2h.w[1]++; | |
484 | ||
485 | // A2 <<= 49 | |
486 | CA2.w[2] = (A2h.w[1] << 49) | (A2.w[1] >> (64 - 49)); | |
487 | CA2.w[1] = (A2.w[1] << 49) | (A2.w[0] >> (64 - 49)); | |
488 | CA2.w[0] = A2.w[0] << 49; | |
489 | ||
490 | __sub_borrow_out (CA4.w[0], carry64, CA4.w[0], CA2.w[0]); | |
491 | __sub_borrow_in_out (CA4.w[1], carry64, CA4.w[1], CA2.w[1], | |
492 | carry64); | |
493 | CA4.w[2] = CA4.w[2] - CA2.w[2] - carry64; | |
494 | ||
495 | CQT.w[1] = Q >> (64 - 49); | |
496 | CQT.w[0] = Q << 49; | |
497 | __add_128_128 (CQ, CQ, CQT); | |
498 | ||
499 | lx = ((double) CA4.w[2] * d128 + | |
500 | ((double) CA4.w[1] * t64.d + (double) CA4.w[0])); | |
501 | lq = lx / ly; | |
502 | } | |
503 | ||
504 | Q = (UINT64) lq; | |
200359e8 L |
505 | __mul_64x64_to_128 (A2, Q, CY.w[0]); |
506 | A2.w[1] += Q * CY.w[1]; | |
507 | ||
508 | __sub_128_128 (CA4, CA4, A2); | |
509 | if ((SINT64) CA4.w[1] < 0) { | |
510 | Q--; | |
511 | CA4.w[0] += CY.w[0]; | |
512 | if (CA4.w[0] < CY.w[0]) | |
513 | CA4.w[1]++; | |
514 | CA4.w[1] += CY.w[1]; | |
515 | if ((SINT64) CA4.w[1] < 0) { | |
516 | Q--; | |
517 | CA4.w[0] += CY.w[0]; | |
518 | if (CA4.w[0] < CY.w[0]) | |
519 | CA4.w[1]++; | |
520 | CA4.w[1] += CY.w[1]; | |
521 | } | |
522 | } else if (__unsigned_compare_ge_128 (CA4, CY)) { | |
523 | Q++; | |
524 | __sub_128_128 (CA4, CA4, CY); | |
525 | } | |
526 | ||
527 | __add_128_64 (CQ, CQ, Q); | |
528 | ||
529 | pCQ->w[1] = CQ.w[1]; | |
530 | pCQ->w[0] = CQ.w[0]; | |
531 | pCA4->w[1] = CA4.w[1]; | |
532 | pCA4->w[0] = CA4.w[0]; | |
533 | return; | |
534 | ||
535 | ||
536 | ||
537 | } | |
538 | ||
539 | #endif | |
540 | #endif |