]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgcc/config/libbid/bid_div_macros.h
Makefile.in (dfp-filenames): Replace decimal_globals...
[thirdparty/gcc.git] / libgcc / config / libbid / bid_div_macros.h
1 /* Copyright (C) 2007 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
8 version.
9
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
17 executable.)
18
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
22 for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA. */
28
29 #ifndef _DIV_MACROS_H_
30 #define _DIV_MACROS_H_
31
32 #include "bid_internal.h"
33
34 #define FENCE __fence
35 //#define FENCE
36
37 //#define DOUBLE_EXTENDED_ON
38
39 #if DOUBLE_EXTENDED_ON
40
41
42 __BID_INLINE__ void
43 __div_128_by_128 (UINT128 * pCQ, UINT128 * pCR, UINT128 CX, UINT128 CY) {
44 UINT128 CB, CB2, CB4, CB8, CQB, CA;
45 int_double d64, dm64, ds;
46 int_float t64;
47 double dx, dq, dqh;
48 BINARY80 lq, lx, ly;
49 UINT64 Rh, R, B2, B4, Ph, Ql, Ql2, carry, Qh;
50
51 if (!CY.w[1]) {
52 pCR->w[1] = 0;
53
54 if (!CX.w[1]) {
55 pCQ->w[0] = CX.w[0] / CY.w[0];
56 pCQ->w[1] = 0;
57 pCR->w[1] = 0;
58 pCR->w[0] = CX.w[0] - pCQ->w[0] * CY.w[0];
59 } else {
60
61 // This path works for CX<2^116 only
62
63 // 2^64
64 d64.i = 0x43f0000000000000;
65 // 2^64
66 dm64.i = 0x3bf0000000000000;
67 // 1.5*2^(-52)
68 ds.i = 0x3cb8000000000000;
69 dx = (BINARY80) CX.w[1] * d64.d + (BINARY80) CX.w[0];
70 dq = dx / (BINARY80) CY.w[0];
71 dq -= dq * (ds.d);
72 dqh = dq * dm64.d;
73 Qh = (UINT64) dqh;
74 Ql = (UINT64) (dq - ((double) Qh) * d64.d);
75
76 Rh = CX.w[0] - Ql * CY.w[0];
77 Ql2 = Rh / CY.w[0];
78 pCR->w[0] = Rh - Ql2 * CY.w[0];
79 __add_carry_out ((pCQ->w[0]), carry, Ql, Ql2);
80 pCQ->w[1] = Qh + carry;
81
82 }
83 return;
84 }
85 // now CY.w[1] > 0
86
87 // 2^64
88 t64.i = 0x5f800000;
89 lx = (BINARY80) CX.w[1] * (BINARY80) t64.d + (BINARY80) CX.w[0];
90 ly = (BINARY80) CY.w[1] * (BINARY80) t64.d + (BINARY80) CY.w[0];
91 lq = lx / ly;
92 pCQ->w[0] = (UINT64) lq;
93
94 pCQ->w[1] = 0;
95
96 if (!pCQ->w[0]) {
97 /*if(__unsigned_compare_ge_128(CX,CY))
98 {
99 pCQ->w[0] = 1;
100 __sub_128_128((*pCR), CX, CY);
101 }
102 else */
103 {
104 pCR->w[1] = CX.w[1];
105 pCR->w[0] = CX.w[0];
106 }
107 return;
108 }
109
110 if (CY.w[1] >= 16 || pCQ->w[0] <= 0x1000000000000000ull) {
111 pCQ->w[0] = (UINT64) lq - 1;
112 __mul_64x128_full (Ph, CQB, (pCQ->w[0]), CY);
113 __sub_128_128 (CA, CX, CQB);
114 if (__unsigned_compare_ge_128 (CA, CY)) {
115 __sub_128_128 (CA, CA, CY);
116 pCQ->w[0]++;
117 if (__unsigned_compare_ge_128 (CA, CY)) {
118 __sub_128_128 (CA, CA, CY);
119 pCQ->w[0]++;
120 }
121 }
122 pCR->w[1] = CA.w[1];
123 pCR->w[0] = CA.w[0];
124 } else {
125 pCQ->w[0] = (UINT64) lq - 6;
126
127 __mul_64x128_full (Ph, CQB, (pCQ->w[0]), CY);
128 __sub_128_128 (CA, CX, CQB);
129
130 CB8.w[1] = (CY.w[1] << 3) | (CY.w[0] >> 61);
131 CB8.w[0] = CY.w[0] << 3;
132 CB4.w[1] = (CY.w[1] << 2) | (CY.w[0] >> 62);
133 CB4.w[0] = CY.w[0] << 2;
134 CB2.w[1] = (CY.w[1] << 1) | (CY.w[0] >> 63);
135 CB2.w[0] = CY.w[0] << 1;
136
137 if (__unsigned_compare_ge_128 (CA, CB8)) {
138 pCQ->w[0] += 8;
139 __sub_128_128 (CA, CA, CB8);
140 }
141 if (__unsigned_compare_ge_128 (CA, CB4)) {
142 pCQ->w[0] += 4;
143 __sub_128_128 (CA, CA, CB4);
144 }
145 if (__unsigned_compare_ge_128 (CA, CB2)) {
146 pCQ->w[0] += 2;
147 __sub_128_128 (CA, CA, CB2);
148 }
149 if (__unsigned_compare_ge_128 (CA, CY)) {
150 pCQ->w[0] += 1;
151 __sub_128_128 (CA, CA, CY);
152 }
153
154 pCR->w[1] = CA.w[1];
155 pCR->w[0] = CA.w[0];
156 }
157 }
158
159
160
161
162
163
164 __BID_INLINE__ void
165 __div_256_by_128 (UINT128 * pCQ, UINT256 * pCA4, UINT128 CY) {
166 UINT256 CQ2Y;
167 UINT128 CQ2, CQ3Y;
168 UINT64 Q3, carry64;
169 int_double d64;
170 BINARY80 lx, ly, lq, l64, l128;
171
172 // 2^64
173 d64.i = 0x43f0000000000000ull;
174 l64 = (BINARY80) d64.d;
175 // 2^128
176 l128 = l64 * l64;
177
178 lx =
179 ((BINARY80) (*pCA4).w[3] * l64 +
180 (BINARY80) (*pCA4).w[2]) * l128 +
181 (BINARY80) (*pCA4).w[1] * l64 + (BINARY80) (*pCA4).w[0];
182 ly = (BINARY80) CY.w[1] * l128 + (BINARY80) CY.w[0] * l64;
183
184 lq = lx / ly;
185 CQ2.w[1] = (UINT64) lq;
186 lq = (lq - CQ2.w[1]) * l64;
187 CQ2.w[0] = (UINT64) lq;
188
189 // CQ2*CY
190 __mul_128x128_to_256 (CQ2Y, CY, CQ2);
191
192 // CQ2Y <= (*pCA4) ?
193 if (CQ2Y.w[3] < (*pCA4).w[3]
194 || (CQ2Y.w[3] == (*pCA4).w[3]
195 && (CQ2Y.w[2] < (*pCA4).w[2]
196 || (CQ2Y.w[2] == (*pCA4).w[2]
197 && (CQ2Y.w[1] < (*pCA4).w[1]
198 || (CQ2Y.w[1] == (*pCA4).w[1]
199 && (CQ2Y.w[0] <= (*pCA4).w[0]))))))) {
200
201 // (*pCA4) -CQ2Y, guaranteed below 5*2^49*CY < 5*2^(49+128)
202 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CQ2Y.w[0]);
203 __sub_borrow_in_out ((*pCA4).w[1], carry64, (*pCA4).w[1], CQ2Y.w[1],
204 carry64);
205 (*pCA4).w[2] = (*pCA4).w[2] - CQ2Y.w[2] - carry64;
206
207 lx = ((BINARY80) (*pCA4).w[2] * l128 +
208 ((BINARY80) (*pCA4).w[1] * l64 +
209 (BINARY80) (*pCA4).w[0])) * l64;
210 lq = lx / ly;
211 Q3 = (UINT64) lq;
212
213 if (Q3) {
214 Q3--;
215 __mul_64x128_short (CQ3Y, Q3, CY);
216 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CQ3Y.w[0]);
217 (*pCA4).w[1] = (*pCA4).w[1] - CQ3Y.w[1] - carry64;
218
219 if ((*pCA4).w[1] > CY.w[1]
220 || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) {
221 Q3++;
222 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]);
223 (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64;
224 if ((*pCA4).w[1] > CY.w[1]
225 || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) {
226 Q3++;
227 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0],
228 CY.w[0]);
229 (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64;
230 }
231 }
232 // add Q3 to Q2
233 __add_carry_out (CQ2.w[0], carry64, Q3, CQ2.w[0]);
234 CQ2.w[1] += carry64;
235 }
236 } else {
237 // CQ2Y - (*pCA4), guaranteed below 5*2^(49+128)
238 __sub_borrow_out ((*pCA4).w[0], carry64, CQ2Y.w[0], (*pCA4).w[0]);
239 __sub_borrow_in_out ((*pCA4).w[1], carry64, CQ2Y.w[1], (*pCA4).w[1],
240 carry64);
241 (*pCA4).w[2] = CQ2Y.w[2] - (*pCA4).w[2] - carry64;
242
243 lx =
244 ((BINARY80) (*pCA4).w[2] * l128 +
245 (BINARY80) (*pCA4).w[1] * l64 + (BINARY80) (*pCA4).w[0]) * l64;
246 lq = lx / ly;
247 Q3 = 1 + (UINT64) lq;
248
249 __mul_64x128_short (CQ3Y, Q3, CY);
250 __sub_borrow_out ((*pCA4).w[0], carry64, CQ3Y.w[0], (*pCA4).w[0]);
251 (*pCA4).w[1] = CQ3Y.w[1] - (*pCA4).w[1] - carry64;
252
253 if ((SINT64) (*pCA4).w[1] > (SINT64) CY.w[1]
254 || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) {
255 Q3--;
256 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]);
257 (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64;
258 } else if ((SINT64) (*pCA4).w[1] < 0) {
259 Q3++;
260 __add_carry_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]);
261 (*pCA4).w[1] = (*pCA4).w[1] + CY.w[1] + carry64;
262 }
263 // subtract Q3 from Q2
264 __sub_borrow_out (CQ2.w[0], carry64, CQ2.w[0], Q3);
265 CQ2.w[1] -= carry64;
266 }
267
268 // (*pCQ) + CQ2 + carry
269 __add_carry_out ((*pCQ).w[0], carry64, CQ2.w[0], (*pCQ).w[0]);
270 (*pCQ).w[1] = (*pCQ).w[1] + CQ2.w[1] + carry64;
271
272
273 }
274 #else
275
276 __BID_INLINE__ void
277 __div_128_by_128 (UINT128 * pCQ, UINT128 * pCR, UINT128 CX0, UINT128 CY) {
278 UINT128 CY36, CY51, CQ, A2, CX, CQT;
279 UINT64 Q;
280 int_double t64, d49, d60;
281 double lx, ly, lq;
282
283 if (!CX0.w[1] && !CY.w[1]) {
284 pCQ->w[0] = CX0.w[0] / CY.w[0];
285 pCQ->w[1] = 0;
286 pCR->w[1] = pCR->w[0] = 0;
287 pCR->w[0] = CX0.w[0] - pCQ->w[0] * CY.w[0];
288 return;
289 }
290
291 CX.w[1] = CX0.w[1];
292 CX.w[0] = CX0.w[0];
293
294 // 2^64
295 t64.i = 0x43f0000000000000ull;
296 lx = (double) CX.w[1] * t64.d + (double) CX.w[0];
297 ly = (double) CY.w[1] * t64.d + (double) CY.w[0];
298 lq = lx / ly;
299
300 CY36.w[1] = CY.w[0] >> (64 - 36);
301 CY36.w[0] = CY.w[0] << 36;
302
303 CQ.w[1] = CQ.w[0] = 0;
304
305 // Q >= 2^100 ?
306 if (!CY.w[1] && !CY36.w[1] && (CX.w[1] >= CY36.w[0])) {
307 // then Q >= 2^100
308
309 // 2^(-60)*CX/CY
310 d60.i = 0x3c30000000000000ull;
311 lq *= d60.d;
312 Q = (UINT64) lq - 4ull;
313
314 // Q*CY
315 __mul_64x64_to_128 (A2, Q, CY.w[0]);
316
317 // A2 <<= 60
318 A2.w[1] = (A2.w[1] << 60) | (A2.w[0] >> (64 - 60));
319 A2.w[0] <<= 60;
320
321 __sub_128_128 (CX, CX, A2);
322
323 lx = (double) CX.w[1] * t64.d + (double) CX.w[0];
324 lq = lx / ly;
325
326 CQ.w[1] = Q >> (64 - 60);
327 CQ.w[0] = Q << 60;
328 }
329
330
331 CY51.w[1] = (CY.w[1] << 51) | (CY.w[0] >> (64 - 51));
332 CY51.w[0] = CY.w[0] << 51;
333
334 if (CY.w[1] < (UINT64) (1 << (64 - 51))
335 && (__unsigned_compare_gt_128 (CX, CY51))) {
336 // Q > 2^51
337
338 // 2^(-49)*CX/CY
339 d49.i = 0x3ce0000000000000ull;
340 lq *= d49.d;
341
342 Q = (UINT64) lq - 1ull;
343
344 // Q*CY
345 __mul_64x64_to_128 (A2, Q, CY.w[0]);
346 A2.w[1] += Q * CY.w[1];
347
348 // A2 <<= 49
349 A2.w[1] = (A2.w[1] << 49) | (A2.w[0] >> (64 - 49));
350 A2.w[0] <<= 49;
351
352 __sub_128_128 (CX, CX, A2);
353
354 CQT.w[1] = Q >> (64 - 49);
355 CQT.w[0] = Q << 49;
356 __add_128_128 (CQ, CQ, CQT);
357
358 lx = (double) CX.w[1] * t64.d + (double) CX.w[0];
359 lq = lx / ly;
360 }
361
362 Q = (UINT64) lq;
363
364 __mul_64x64_to_128 (A2, Q, CY.w[0]);
365 A2.w[1] += Q * CY.w[1];
366
367 __sub_128_128 (CX, CX, A2);
368 if ((SINT64) CX.w[1] < 0) {
369 Q--;
370 CX.w[0] += CY.w[0];
371 if (CX.w[0] < CY.w[0])
372 CX.w[1]++;
373 CX.w[1] += CY.w[1];
374 if ((SINT64) CX.w[1] < 0) {
375 Q--;
376 CX.w[0] += CY.w[0];
377 if (CX.w[0] < CY.w[0])
378 CX.w[1]++;
379 CX.w[1] += CY.w[1];
380 }
381 } else if (__unsigned_compare_ge_128 (CX, CY)) {
382 Q++;
383 __sub_128_128 (CX, CX, CY);
384 }
385
386 __add_128_64 (CQ, CQ, Q);
387
388
389 pCQ->w[1] = CQ.w[1];
390 pCQ->w[0] = CQ.w[0];
391 pCR->w[1] = CX.w[1];
392 pCR->w[0] = CX.w[0];
393 return;
394 }
395
396
397 __BID_INLINE__ void
398 __div_256_by_128 (UINT128 * pCQ, UINT256 * pCA4, UINT128 CY) {
399 UINT256 CA4, CA2, CY51, CY36;
400 UINT128 CQ, A2, A2h, CQT;
401 UINT64 Q, carry64;
402 int_double t64, d49, d60;
403 double lx, ly, lq, d128, d192;
404
405 // the quotient is assumed to be at most 113 bits,
406 // as needed by BID128 divide routines
407
408 // initial dividend
409 CA4.w[3] = (*pCA4).w[3];
410 CA4.w[2] = (*pCA4).w[2];
411 CA4.w[1] = (*pCA4).w[1];
412 CA4.w[0] = (*pCA4).w[0];
413 CQ.w[1] = (*pCQ).w[1];
414 CQ.w[0] = (*pCQ).w[0];
415
416 // 2^64
417 t64.i = 0x43f0000000000000ull;
418 d128 = t64.d * t64.d;
419 d192 = d128 * t64.d;
420 lx = (double) CA4.w[3] * d192 + ((double) CA4.w[2] * d128 +
421 ((double) CA4.w[1] * t64.d +
422 (double) CA4.w[0]));
423 ly = (double) CY.w[1] * t64.d + (double) CY.w[0];
424 lq = lx / ly;
425
426 CY36.w[2] = CY.w[1] >> (64 - 36);
427 CY36.w[1] = (CY.w[1] << 36) | (CY.w[0] >> (64 - 36));
428 CY36.w[0] = CY.w[0] << 36;
429
430 CQ.w[1] = (*pCQ).w[1];
431 CQ.w[0] = (*pCQ).w[0];
432
433 // Q >= 2^100 ?
434 if (CA4.w[3] > CY36.w[2]
435 || (CA4.w[3] == CY36.w[2]
436 && (CA4.w[2] > CY36.w[1]
437 || (CA4.w[2] == CY36.w[1] && CA4.w[1] >= CY36.w[0])))) {
438 // 2^(-60)*CA4/CY
439 d60.i = 0x3c30000000000000ull;
440 lq *= d60.d;
441 Q = (UINT64) lq - 4ull;
442
443 // Q*CY
444 __mul_64x128_to_192 (CA2, Q, CY);
445
446 // CA2 <<= 60
447 // CA2.w[3] = CA2.w[2] >> (64-60);
448 CA2.w[2] = (CA2.w[2] << 60) | (CA2.w[1] >> (64 - 60));
449 CA2.w[1] = (CA2.w[1] << 60) | (CA2.w[0] >> (64 - 60));
450 CA2.w[0] <<= 60;
451
452 // CA4 -= CA2
453 __sub_borrow_out (CA4.w[0], carry64, CA4.w[0], CA2.w[0]);
454 __sub_borrow_in_out (CA4.w[1], carry64, CA4.w[1], CA2.w[1],
455 carry64);
456 CA4.w[2] = CA4.w[2] - CA2.w[2] - carry64;
457
458 lx = ((double) CA4.w[2] * d128 +
459 ((double) CA4.w[1] * t64.d + (double) CA4.w[0]));
460 lq = lx / ly;
461
462 CQT.w[1] = Q >> (64 - 60);
463 CQT.w[0] = Q << 60;
464 __add_128_128 (CQ, CQ, CQT);
465 }
466
467 CY51.w[2] = CY.w[1] >> (64 - 51);
468 CY51.w[1] = (CY.w[1] << 51) | (CY.w[0] >> (64 - 51));
469 CY51.w[0] = CY.w[0] << 51;
470
471 if (CA4.w[2] > CY51.w[2] || ((CA4.w[2] == CY51.w[2])
472 &&
473 (__unsigned_compare_gt_128 (CA4, CY51))))
474 {
475 // Q > 2^51
476
477 // 2^(-49)*CA4/CY
478 d49.i = 0x3ce0000000000000ull;
479 lq *= d49.d;
480
481 Q = (UINT64) lq - 1ull;
482
483 // Q*CY
484 __mul_64x64_to_128 (A2, Q, CY.w[0]);
485 __mul_64x64_to_128 (A2h, Q, CY.w[1]);
486 A2.w[1] += A2h.w[0];
487 if (A2.w[1] < A2h.w[0])
488 A2h.w[1]++;
489
490 // A2 <<= 49
491 CA2.w[2] = (A2h.w[1] << 49) | (A2.w[1] >> (64 - 49));
492 CA2.w[1] = (A2.w[1] << 49) | (A2.w[0] >> (64 - 49));
493 CA2.w[0] = A2.w[0] << 49;
494
495 __sub_borrow_out (CA4.w[0], carry64, CA4.w[0], CA2.w[0]);
496 __sub_borrow_in_out (CA4.w[1], carry64, CA4.w[1], CA2.w[1],
497 carry64);
498 CA4.w[2] = CA4.w[2] - CA2.w[2] - carry64;
499
500 CQT.w[1] = Q >> (64 - 49);
501 CQT.w[0] = Q << 49;
502 __add_128_128 (CQ, CQ, CQT);
503
504 lx = ((double) CA4.w[2] * d128 +
505 ((double) CA4.w[1] * t64.d + (double) CA4.w[0]));
506 lq = lx / ly;
507 }
508
509 Q = (UINT64) lq;
510 __mul_64x64_to_128 (A2, Q, CY.w[0]);
511 A2.w[1] += Q * CY.w[1];
512
513 __sub_128_128 (CA4, CA4, A2);
514 if ((SINT64) CA4.w[1] < 0) {
515 Q--;
516 CA4.w[0] += CY.w[0];
517 if (CA4.w[0] < CY.w[0])
518 CA4.w[1]++;
519 CA4.w[1] += CY.w[1];
520 if ((SINT64) CA4.w[1] < 0) {
521 Q--;
522 CA4.w[0] += CY.w[0];
523 if (CA4.w[0] < CY.w[0])
524 CA4.w[1]++;
525 CA4.w[1] += CY.w[1];
526 }
527 } else if (__unsigned_compare_ge_128 (CA4, CY)) {
528 Q++;
529 __sub_128_128 (CA4, CA4, CY);
530 }
531
532 __add_128_64 (CQ, CQ, Q);
533
534 pCQ->w[1] = CQ.w[1];
535 pCQ->w[0] = CQ.w[0];
536 pCA4->w[1] = CA4.w[1];
537 pCA4->w[0] = CA4.w[0];
538 return;
539
540
541
542 }
543
544 #endif
545 #endif