]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/libbid/bid_div_macros.h
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / libbid / bid_div_macros.h
CommitLineData
83ffe9cd 1/* Copyright (C) 2007-2023 Free Software Foundation, Inc.
200359e8
L
2
3This file is part of GCC.
4
5GCC is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License as published by the Free
748086b7 7Software Foundation; either version 3, or (at your option) any later
200359e8
L
8version.
9
200359e8
L
10GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11WARRANTY; without even the implied warranty of MERCHANTABILITY or
12FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13for more details.
14
748086b7
JJ
15Under Section 7 of GPL version 3, you are granted additional
16permissions described in the GCC Runtime Library Exception, version
173.1, as published by the Free Software Foundation.
18
19You should have received a copy of the GNU General Public License and
20a copy of the GCC Runtime Library Exception along with this program;
21see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22<http://www.gnu.org/licenses/>. */
200359e8
L
23
24#ifndef _DIV_MACROS_H_
25#define _DIV_MACROS_H_
26
27#include "bid_internal.h"
28
29#define FENCE __fence
30//#define FENCE
31
32//#define DOUBLE_EXTENDED_ON
33
34#if DOUBLE_EXTENDED_ON
35
36
37__BID_INLINE__ void
38__div_128_by_128 (UINT128 * pCQ, UINT128 * pCR, UINT128 CX, UINT128 CY) {
39 UINT128 CB, CB2, CB4, CB8, CQB, CA;
40 int_double d64, dm64, ds;
41 int_float t64;
42 double dx, dq, dqh;
b2a00c89 43 BINARY80 lq, lx, ly;
200359e8
L
44 UINT64 Rh, R, B2, B4, Ph, Ql, Ql2, carry, Qh;
45
46 if (!CY.w[1]) {
47 pCR->w[1] = 0;
48
49 if (!CX.w[1]) {
50 pCQ->w[0] = CX.w[0] / CY.w[0];
51 pCQ->w[1] = 0;
52 pCR->w[1] = 0;
53 pCR->w[0] = CX.w[0] - pCQ->w[0] * CY.w[0];
54 } else {
55
56 // This path works for CX<2^116 only
57
58 // 2^64
59 d64.i = 0x43f0000000000000;
60 // 2^64
61 dm64.i = 0x3bf0000000000000;
62 // 1.5*2^(-52)
63 ds.i = 0x3cb8000000000000;
b2a00c89
L
64 dx = (BINARY80) CX.w[1] * d64.d + (BINARY80) CX.w[0];
65 dq = dx / (BINARY80) CY.w[0];
200359e8
L
66 dq -= dq * (ds.d);
67 dqh = dq * dm64.d;
68 Qh = (UINT64) dqh;
69 Ql = (UINT64) (dq - ((double) Qh) * d64.d);
70
71 Rh = CX.w[0] - Ql * CY.w[0];
72 Ql2 = Rh / CY.w[0];
73 pCR->w[0] = Rh - Ql2 * CY.w[0];
74 __add_carry_out ((pCQ->w[0]), carry, Ql, Ql2);
75 pCQ->w[1] = Qh + carry;
76
77 }
78 return;
79 }
80 // now CY.w[1] > 0
81
82 // 2^64
83 t64.i = 0x5f800000;
b2a00c89
L
84 lx = (BINARY80) CX.w[1] * (BINARY80) t64.d + (BINARY80) CX.w[0];
85 ly = (BINARY80) CY.w[1] * (BINARY80) t64.d + (BINARY80) CY.w[0];
200359e8
L
86 lq = lx / ly;
87 pCQ->w[0] = (UINT64) lq;
88
89 pCQ->w[1] = 0;
90
91 if (!pCQ->w[0]) {
92 /*if(__unsigned_compare_ge_128(CX,CY))
93 {
94 pCQ->w[0] = 1;
95 __sub_128_128((*pCR), CX, CY);
96 }
97 else */
98 {
99 pCR->w[1] = CX.w[1];
100 pCR->w[0] = CX.w[0];
101 }
102 return;
103 }
104
105 if (CY.w[1] >= 16 || pCQ->w[0] <= 0x1000000000000000ull) {
106 pCQ->w[0] = (UINT64) lq - 1;
107 __mul_64x128_full (Ph, CQB, (pCQ->w[0]), CY);
108 __sub_128_128 (CA, CX, CQB);
109 if (__unsigned_compare_ge_128 (CA, CY)) {
110 __sub_128_128 (CA, CA, CY);
111 pCQ->w[0]++;
112 if (__unsigned_compare_ge_128 (CA, CY)) {
113 __sub_128_128 (CA, CA, CY);
114 pCQ->w[0]++;
115 }
116 }
117 pCR->w[1] = CA.w[1];
118 pCR->w[0] = CA.w[0];
119 } else {
120 pCQ->w[0] = (UINT64) lq - 6;
121
122 __mul_64x128_full (Ph, CQB, (pCQ->w[0]), CY);
123 __sub_128_128 (CA, CX, CQB);
124
125 CB8.w[1] = (CY.w[1] << 3) | (CY.w[0] >> 61);
126 CB8.w[0] = CY.w[0] << 3;
127 CB4.w[1] = (CY.w[1] << 2) | (CY.w[0] >> 62);
128 CB4.w[0] = CY.w[0] << 2;
129 CB2.w[1] = (CY.w[1] << 1) | (CY.w[0] >> 63);
130 CB2.w[0] = CY.w[0] << 1;
131
132 if (__unsigned_compare_ge_128 (CA, CB8)) {
133 pCQ->w[0] += 8;
134 __sub_128_128 (CA, CA, CB8);
135 }
136 if (__unsigned_compare_ge_128 (CA, CB4)) {
137 pCQ->w[0] += 4;
138 __sub_128_128 (CA, CA, CB4);
139 }
140 if (__unsigned_compare_ge_128 (CA, CB2)) {
141 pCQ->w[0] += 2;
142 __sub_128_128 (CA, CA, CB2);
143 }
144 if (__unsigned_compare_ge_128 (CA, CY)) {
145 pCQ->w[0] += 1;
146 __sub_128_128 (CA, CA, CY);
147 }
148
149 pCR->w[1] = CA.w[1];
150 pCR->w[0] = CA.w[0];
151 }
152}
153
154
155
156
157
158
159__BID_INLINE__ void
160__div_256_by_128 (UINT128 * pCQ, UINT256 * pCA4, UINT128 CY) {
161 UINT256 CQ2Y;
162 UINT128 CQ2, CQ3Y;
163 UINT64 Q3, carry64;
164 int_double d64;
b2a00c89 165 BINARY80 lx, ly, lq, l64, l128;
200359e8
L
166
167 // 2^64
168 d64.i = 0x43f0000000000000ull;
b2a00c89 169 l64 = (BINARY80) d64.d;
200359e8
L
170 // 2^128
171 l128 = l64 * l64;
172
173 lx =
b2a00c89
L
174 ((BINARY80) (*pCA4).w[3] * l64 +
175 (BINARY80) (*pCA4).w[2]) * l128 +
176 (BINARY80) (*pCA4).w[1] * l64 + (BINARY80) (*pCA4).w[0];
177 ly = (BINARY80) CY.w[1] * l128 + (BINARY80) CY.w[0] * l64;
200359e8
L
178
179 lq = lx / ly;
180 CQ2.w[1] = (UINT64) lq;
181 lq = (lq - CQ2.w[1]) * l64;
182 CQ2.w[0] = (UINT64) lq;
183
184 // CQ2*CY
185 __mul_128x128_to_256 (CQ2Y, CY, CQ2);
186
187 // CQ2Y <= (*pCA4) ?
188 if (CQ2Y.w[3] < (*pCA4).w[3]
189 || (CQ2Y.w[3] == (*pCA4).w[3]
190 && (CQ2Y.w[2] < (*pCA4).w[2]
191 || (CQ2Y.w[2] == (*pCA4).w[2]
192 && (CQ2Y.w[1] < (*pCA4).w[1]
193 || (CQ2Y.w[1] == (*pCA4).w[1]
194 && (CQ2Y.w[0] <= (*pCA4).w[0]))))))) {
195
196 // (*pCA4) -CQ2Y, guaranteed below 5*2^49*CY < 5*2^(49+128)
197 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CQ2Y.w[0]);
198 __sub_borrow_in_out ((*pCA4).w[1], carry64, (*pCA4).w[1], CQ2Y.w[1],
199 carry64);
200 (*pCA4).w[2] = (*pCA4).w[2] - CQ2Y.w[2] - carry64;
201
b2a00c89
L
202 lx = ((BINARY80) (*pCA4).w[2] * l128 +
203 ((BINARY80) (*pCA4).w[1] * l64 +
204 (BINARY80) (*pCA4).w[0])) * l64;
200359e8
L
205 lq = lx / ly;
206 Q3 = (UINT64) lq;
207
208 if (Q3) {
209 Q3--;
210 __mul_64x128_short (CQ3Y, Q3, CY);
211 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CQ3Y.w[0]);
212 (*pCA4).w[1] = (*pCA4).w[1] - CQ3Y.w[1] - carry64;
213
214 if ((*pCA4).w[1] > CY.w[1]
215 || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) {
216 Q3++;
217 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]);
218 (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64;
219 if ((*pCA4).w[1] > CY.w[1]
220 || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) {
221 Q3++;
222 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0],
223 CY.w[0]);
224 (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64;
225 }
226 }
227 // add Q3 to Q2
228 __add_carry_out (CQ2.w[0], carry64, Q3, CQ2.w[0]);
229 CQ2.w[1] += carry64;
230 }
231 } else {
232 // CQ2Y - (*pCA4), guaranteed below 5*2^(49+128)
233 __sub_borrow_out ((*pCA4).w[0], carry64, CQ2Y.w[0], (*pCA4).w[0]);
234 __sub_borrow_in_out ((*pCA4).w[1], carry64, CQ2Y.w[1], (*pCA4).w[1],
235 carry64);
236 (*pCA4).w[2] = CQ2Y.w[2] - (*pCA4).w[2] - carry64;
237
238 lx =
b2a00c89
L
239 ((BINARY80) (*pCA4).w[2] * l128 +
240 (BINARY80) (*pCA4).w[1] * l64 + (BINARY80) (*pCA4).w[0]) * l64;
200359e8
L
241 lq = lx / ly;
242 Q3 = 1 + (UINT64) lq;
243
244 __mul_64x128_short (CQ3Y, Q3, CY);
245 __sub_borrow_out ((*pCA4).w[0], carry64, CQ3Y.w[0], (*pCA4).w[0]);
246 (*pCA4).w[1] = CQ3Y.w[1] - (*pCA4).w[1] - carry64;
247
248 if ((SINT64) (*pCA4).w[1] > (SINT64) CY.w[1]
249 || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) {
250 Q3--;
251 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]);
252 (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64;
253 } else if ((SINT64) (*pCA4).w[1] < 0) {
254 Q3++;
255 __add_carry_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]);
256 (*pCA4).w[1] = (*pCA4).w[1] + CY.w[1] + carry64;
257 }
258 // subtract Q3 from Q2
259 __sub_borrow_out (CQ2.w[0], carry64, CQ2.w[0], Q3);
260 CQ2.w[1] -= carry64;
261 }
262
263 // (*pCQ) + CQ2 + carry
264 __add_carry_out ((*pCQ).w[0], carry64, CQ2.w[0], (*pCQ).w[0]);
265 (*pCQ).w[1] = (*pCQ).w[1] + CQ2.w[1] + carry64;
266
267
268}
269#else
270
271__BID_INLINE__ void
272__div_128_by_128 (UINT128 * pCQ, UINT128 * pCR, UINT128 CX0, UINT128 CY) {
273 UINT128 CY36, CY51, CQ, A2, CX, CQT;
274 UINT64 Q;
275 int_double t64, d49, d60;
276 double lx, ly, lq;
277
278 if (!CX0.w[1] && !CY.w[1]) {
279 pCQ->w[0] = CX0.w[0] / CY.w[0];
280 pCQ->w[1] = 0;
281 pCR->w[1] = pCR->w[0] = 0;
282 pCR->w[0] = CX0.w[0] - pCQ->w[0] * CY.w[0];
283 return;
284 }
285
286 CX.w[1] = CX0.w[1];
287 CX.w[0] = CX0.w[0];
288
289 // 2^64
290 t64.i = 0x43f0000000000000ull;
291 lx = (double) CX.w[1] * t64.d + (double) CX.w[0];
292 ly = (double) CY.w[1] * t64.d + (double) CY.w[0];
293 lq = lx / ly;
294
295 CY36.w[1] = CY.w[0] >> (64 - 36);
296 CY36.w[0] = CY.w[0] << 36;
297
298 CQ.w[1] = CQ.w[0] = 0;
299
300 // Q >= 2^100 ?
301 if (!CY.w[1] && !CY36.w[1] && (CX.w[1] >= CY36.w[0])) {
302 // then Q >= 2^100
303
304 // 2^(-60)*CX/CY
305 d60.i = 0x3c30000000000000ull;
306 lq *= d60.d;
b2a00c89 307 Q = (UINT64) lq - 4ull;
200359e8
L
308
309 // Q*CY
310 __mul_64x64_to_128 (A2, Q, CY.w[0]);
311
312 // A2 <<= 60
313 A2.w[1] = (A2.w[1] << 60) | (A2.w[0] >> (64 - 60));
314 A2.w[0] <<= 60;
315
316 __sub_128_128 (CX, CX, A2);
317
318 lx = (double) CX.w[1] * t64.d + (double) CX.w[0];
319 lq = lx / ly;
320
321 CQ.w[1] = Q >> (64 - 60);
322 CQ.w[0] = Q << 60;
323 }
324
325
326 CY51.w[1] = (CY.w[1] << 51) | (CY.w[0] >> (64 - 51));
327 CY51.w[0] = CY.w[0] << 51;
328
329 if (CY.w[1] < (UINT64) (1 << (64 - 51))
330 && (__unsigned_compare_gt_128 (CX, CY51))) {
331 // Q > 2^51
332
333 // 2^(-49)*CX/CY
334 d49.i = 0x3ce0000000000000ull;
335 lq *= d49.d;
336
b2a00c89 337 Q = (UINT64) lq - 1ull;
200359e8
L
338
339 // Q*CY
340 __mul_64x64_to_128 (A2, Q, CY.w[0]);
341 A2.w[1] += Q * CY.w[1];
342
343 // A2 <<= 49
344 A2.w[1] = (A2.w[1] << 49) | (A2.w[0] >> (64 - 49));
345 A2.w[0] <<= 49;
346
347 __sub_128_128 (CX, CX, A2);
348
349 CQT.w[1] = Q >> (64 - 49);
350 CQT.w[0] = Q << 49;
351 __add_128_128 (CQ, CQ, CQT);
352
353 lx = (double) CX.w[1] * t64.d + (double) CX.w[0];
354 lq = lx / ly;
355 }
356
357 Q = (UINT64) lq;
358
359 __mul_64x64_to_128 (A2, Q, CY.w[0]);
360 A2.w[1] += Q * CY.w[1];
361
362 __sub_128_128 (CX, CX, A2);
363 if ((SINT64) CX.w[1] < 0) {
364 Q--;
365 CX.w[0] += CY.w[0];
366 if (CX.w[0] < CY.w[0])
367 CX.w[1]++;
368 CX.w[1] += CY.w[1];
369 if ((SINT64) CX.w[1] < 0) {
370 Q--;
371 CX.w[0] += CY.w[0];
372 if (CX.w[0] < CY.w[0])
373 CX.w[1]++;
374 CX.w[1] += CY.w[1];
375 }
376 } else if (__unsigned_compare_ge_128 (CX, CY)) {
377 Q++;
378 __sub_128_128 (CX, CX, CY);
379 }
380
381 __add_128_64 (CQ, CQ, Q);
382
383
384 pCQ->w[1] = CQ.w[1];
385 pCQ->w[0] = CQ.w[0];
386 pCR->w[1] = CX.w[1];
387 pCR->w[0] = CX.w[0];
388 return;
389}
390
391
392__BID_INLINE__ void
393__div_256_by_128 (UINT128 * pCQ, UINT256 * pCA4, UINT128 CY) {
394 UINT256 CA4, CA2, CY51, CY36;
395 UINT128 CQ, A2, A2h, CQT;
396 UINT64 Q, carry64;
397 int_double t64, d49, d60;
398 double lx, ly, lq, d128, d192;
399
400 // the quotient is assumed to be at most 113 bits,
401 // as needed by BID128 divide routines
402
403 // initial dividend
404 CA4.w[3] = (*pCA4).w[3];
405 CA4.w[2] = (*pCA4).w[2];
406 CA4.w[1] = (*pCA4).w[1];
407 CA4.w[0] = (*pCA4).w[0];
408 CQ.w[1] = (*pCQ).w[1];
409 CQ.w[0] = (*pCQ).w[0];
410
411 // 2^64
412 t64.i = 0x43f0000000000000ull;
413 d128 = t64.d * t64.d;
414 d192 = d128 * t64.d;
415 lx = (double) CA4.w[3] * d192 + ((double) CA4.w[2] * d128 +
416 ((double) CA4.w[1] * t64.d +
417 (double) CA4.w[0]));
418 ly = (double) CY.w[1] * t64.d + (double) CY.w[0];
419 lq = lx / ly;
420
421 CY36.w[2] = CY.w[1] >> (64 - 36);
422 CY36.w[1] = (CY.w[1] << 36) | (CY.w[0] >> (64 - 36));
423 CY36.w[0] = CY.w[0] << 36;
424
425 CQ.w[1] = (*pCQ).w[1];
426 CQ.w[0] = (*pCQ).w[0];
427
428 // Q >= 2^100 ?
429 if (CA4.w[3] > CY36.w[2]
430 || (CA4.w[3] == CY36.w[2]
431 && (CA4.w[2] > CY36.w[1]
432 || (CA4.w[2] == CY36.w[1] && CA4.w[1] >= CY36.w[0])))) {
433 // 2^(-60)*CA4/CY
434 d60.i = 0x3c30000000000000ull;
435 lq *= d60.d;
b2a00c89 436 Q = (UINT64) lq - 4ull;
200359e8
L
437
438 // Q*CY
439 __mul_64x128_to_192 (CA2, Q, CY);
440
441 // CA2 <<= 60
442 // CA2.w[3] = CA2.w[2] >> (64-60);
443 CA2.w[2] = (CA2.w[2] << 60) | (CA2.w[1] >> (64 - 60));
444 CA2.w[1] = (CA2.w[1] << 60) | (CA2.w[0] >> (64 - 60));
445 CA2.w[0] <<= 60;
446
447 // CA4 -= CA2
448 __sub_borrow_out (CA4.w[0], carry64, CA4.w[0], CA2.w[0]);
449 __sub_borrow_in_out (CA4.w[1], carry64, CA4.w[1], CA2.w[1],
450 carry64);
451 CA4.w[2] = CA4.w[2] - CA2.w[2] - carry64;
452
453 lx = ((double) CA4.w[2] * d128 +
454 ((double) CA4.w[1] * t64.d + (double) CA4.w[0]));
455 lq = lx / ly;
456
457 CQT.w[1] = Q >> (64 - 60);
458 CQT.w[0] = Q << 60;
459 __add_128_128 (CQ, CQ, CQT);
460 }
461
462 CY51.w[2] = CY.w[1] >> (64 - 51);
463 CY51.w[1] = (CY.w[1] << 51) | (CY.w[0] >> (64 - 51));
464 CY51.w[0] = CY.w[0] << 51;
465
b2a00c89
L
466 if (CA4.w[2] > CY51.w[2] || ((CA4.w[2] == CY51.w[2])
467 &&
468 (__unsigned_compare_gt_128 (CA4, CY51))))
469 {
200359e8
L
470 // Q > 2^51
471
472 // 2^(-49)*CA4/CY
473 d49.i = 0x3ce0000000000000ull;
474 lq *= d49.d;
475
b2a00c89 476 Q = (UINT64) lq - 1ull;
200359e8
L
477
478 // Q*CY
479 __mul_64x64_to_128 (A2, Q, CY.w[0]);
480 __mul_64x64_to_128 (A2h, Q, CY.w[1]);
481 A2.w[1] += A2h.w[0];
482 if (A2.w[1] < A2h.w[0])
483 A2h.w[1]++;
484
485 // A2 <<= 49
486 CA2.w[2] = (A2h.w[1] << 49) | (A2.w[1] >> (64 - 49));
487 CA2.w[1] = (A2.w[1] << 49) | (A2.w[0] >> (64 - 49));
488 CA2.w[0] = A2.w[0] << 49;
489
490 __sub_borrow_out (CA4.w[0], carry64, CA4.w[0], CA2.w[0]);
491 __sub_borrow_in_out (CA4.w[1], carry64, CA4.w[1], CA2.w[1],
492 carry64);
493 CA4.w[2] = CA4.w[2] - CA2.w[2] - carry64;
494
495 CQT.w[1] = Q >> (64 - 49);
496 CQT.w[0] = Q << 49;
497 __add_128_128 (CQ, CQ, CQT);
498
499 lx = ((double) CA4.w[2] * d128 +
500 ((double) CA4.w[1] * t64.d + (double) CA4.w[0]));
501 lq = lx / ly;
502 }
503
504 Q = (UINT64) lq;
200359e8
L
505 __mul_64x64_to_128 (A2, Q, CY.w[0]);
506 A2.w[1] += Q * CY.w[1];
507
508 __sub_128_128 (CA4, CA4, A2);
509 if ((SINT64) CA4.w[1] < 0) {
510 Q--;
511 CA4.w[0] += CY.w[0];
512 if (CA4.w[0] < CY.w[0])
513 CA4.w[1]++;
514 CA4.w[1] += CY.w[1];
515 if ((SINT64) CA4.w[1] < 0) {
516 Q--;
517 CA4.w[0] += CY.w[0];
518 if (CA4.w[0] < CY.w[0])
519 CA4.w[1]++;
520 CA4.w[1] += CY.w[1];
521 }
522 } else if (__unsigned_compare_ge_128 (CA4, CY)) {
523 Q++;
524 __sub_128_128 (CA4, CA4, CY);
525 }
526
527 __add_128_64 (CQ, CQ, Q);
528
529 pCQ->w[1] = CQ.w[1];
530 pCQ->w[0] = CQ.w[0];
531 pCA4->w[1] = CA4.w[1];
532 pCA4->w[0] = CA4.w[0];
533 return;
534
535
536
537}
538
539#endif
540#endif