]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/ieee754/dbl-64/mpa.c
Use intermediate variable to compute exponent in __mul
[thirdparty/glibc.git] / sysdeps / ieee754 / dbl-64 / mpa.c
CommitLineData
e4d82761
UD
1/*
2 * IBM Accurate Mathematical Library
aeb25823 3 * written by International Business Machines Corp.
568035b7 4 * Copyright (C) 2001-2013 Free Software Foundation, Inc.
e4d82761
UD
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published by
cc7375ce 8 * the Free Software Foundation; either version 2.1 of the License, or
e4d82761 9 * (at your option) any later version.
50944bca 10 *
e4d82761
UD
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
c6c6dd48 14 * GNU Lesser General Public License for more details.
e4d82761
UD
15 *
16 * You should have received a copy of the GNU Lesser General Public License
59ba27a6 17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
e4d82761
UD
18 */
19/************************************************************************/
20/* MODULE_NAME: mpa.c */
21/* */
22/* FUNCTIONS: */
23/* mcr */
24/* acr */
e4d82761 25/* cpy */
e4d82761
UD
26/* norm */
27/* denorm */
28/* mp_dbl */
29/* dbl_mp */
30/* add_magnitudes */
31/* sub_magnitudes */
32/* add */
33/* sub */
34/* mul */
35/* inv */
36/* dvd */
37/* */
38/* Arithmetic functions for multiple precision numbers. */
39/* Relative errors are bounded */
40/************************************************************************/
41
42
43#include "endian.h"
44#include "mpa.h"
950c99ca 45#include <sys/param.h>
31d3cc00
UD
46
47#ifndef SECTION
48# define SECTION
49#endif
50
b76eb5f0
SP
51#ifndef NO__CONST
52const mp_no mpone = {1, {1.0, 1.0}};
da08f647 53const mp_no mptwo = {1, {1.0, 2.0}};
b76eb5f0
SP
54#endif
55
31d3cc00 56#ifndef NO___ACR
950c99ca
SP
57/* Compare mantissa of two multiple precision numbers regardless of the sign
58 and exponent of the numbers. */
31d3cc00 59static int
1066a534
SP
60mcr (const mp_no *x, const mp_no *y, int p)
61{
e4d82761 62 int i;
1066a534
SP
63 for (i = 1; i <= p; i++)
64 {
65 if (X[i] == Y[i])
66 continue;
67 else if (X[i] > Y[i])
68 return 1;
69 else
70 return -1;
71 }
e4d82761
UD
72 return 0;
73}
74
950c99ca 75/* Compare the absolute values of two multiple precision numbers. */
31d3cc00 76int
1066a534
SP
77__acr (const mp_no *x, const mp_no *y, int p)
78{
e4d82761
UD
79 int i;
80
1066a534
SP
81 if (X[0] == ZERO)
82 {
83 if (Y[0] == ZERO)
84 i = 0;
85 else
86 i = -1;
87 }
88 else if (Y[0] == ZERO)
89 i = 1;
90 else
91 {
92 if (EX > EY)
93 i = 1;
94 else if (EX < EY)
95 i = -1;
96 else
97 i = mcr (x, y, p);
98 }
e4d82761
UD
99
100 return i;
101}
31d3cc00 102#endif
e4d82761 103
af968f62 104#ifndef NO___CPY
950c99ca
SP
105/* Copy multiple precision number X into Y. They could be the same
106 number. */
1066a534
SP
107void
108__cpy (const mp_no *x, mp_no *y, int p)
109{
e4d82761 110 EY = EX;
1066a534
SP
111 for (int i = 0; i <= p; i++)
112 Y[i] = X[i];
e4d82761 113}
af968f62 114#endif
e4d82761 115
af968f62 116#ifndef NO___MP_DBL
950c99ca
SP
117/* Convert a multiple precision number *X into a double precision
118 number *Y, normalized case (|x| >= 2**(-1022))). */
1066a534
SP
119static void
120norm (const mp_no *x, double *y, int p)
50944bca 121{
1066a534 122#define R RADIXI
50944bca 123 int i;
1066a534
SP
124 double a, c, u, v, z[5];
125 if (p < 5)
126 {
127 if (p == 1)
128 c = X[1];
129 else if (p == 2)
130 c = X[1] + R * X[2];
131 else if (p == 3)
132 c = X[1] + R * (X[2] + R * X[3]);
133 else if (p == 4)
134 c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]);
e4d82761 135 }
1066a534
SP
136 else
137 {
138 for (a = ONE, z[1] = X[1]; z[1] < TWO23;)
139 {
140 a *= TWO;
141 z[1] *= TWO;
142 }
e4d82761 143
1066a534
SP
144 for (i = 2; i < 5; i++)
145 {
146 z[i] = X[i] * a;
147 u = (z[i] + CUTTER) - CUTTER;
148 if (u > z[i])
149 u -= RADIX;
150 z[i] -= u;
151 z[i - 1] += u * RADIXI;
152 }
153
154 u = (z[3] + TWO71) - TWO71;
155 if (u > z[3])
156 u -= TWO19;
157 v = z[3] - u;
158
159 if (v == TWO18)
160 {
161 if (z[4] == ZERO)
162 {
163 for (i = 5; i <= p; i++)
164 {
165 if (X[i] == ZERO)
166 continue;
167 else
168 {
169 z[3] += ONE;
170 break;
171 }
172 }
173 }
174 else
175 z[3] += ONE;
176 }
177
178 c = (z[1] + R * (z[2] + R * z[3])) / a;
179 }
e4d82761
UD
180
181 c *= X[0];
182
1066a534
SP
183 for (i = 1; i < EX; i++)
184 c *= RADIX;
185 for (i = 1; i > EX; i--)
186 c *= RADIXI;
e4d82761
UD
187
188 *y = c;
e4d82761
UD
189#undef R
190}
191
950c99ca
SP
192/* Convert a multiple precision number *X into a double precision
193 number *Y, Denormal case (|x| < 2**(-1022))). */
1066a534
SP
194static void
195denorm (const mp_no *x, double *y, int p)
50944bca 196{
1066a534
SP
197 int i, k;
198 double c, u, z[5];
e4d82761 199
99136f82 200#define R RADIXI
1066a534
SP
201 if (EX < -44 || (EX == -44 && X[1] < TWO5))
202 {
203 *y = ZERO;
204 return;
205 }
206
207 if (p == 1)
208 {
209 if (EX == -42)
210 {
211 z[1] = X[1] + TWO10;
212 z[2] = ZERO;
213 z[3] = ZERO;
214 k = 3;
215 }
216 else if (EX == -43)
217 {
218 z[1] = TWO10;
219 z[2] = X[1];
220 z[3] = ZERO;
221 k = 2;
222 }
223 else
224 {
225 z[1] = TWO10;
226 z[2] = ZERO;
227 z[3] = X[1];
228 k = 1;
229 }
230 }
231 else if (p == 2)
232 {
233 if (EX == -42)
234 {
235 z[1] = X[1] + TWO10;
236 z[2] = X[2];
237 z[3] = ZERO;
238 k = 3;
239 }
240 else if (EX == -43)
241 {
242 z[1] = TWO10;
243 z[2] = X[1];
244 z[3] = X[2];
245 k = 2;
246 }
247 else
248 {
249 z[1] = TWO10;
250 z[2] = ZERO;
251 z[3] = X[1];
252 k = 1;
253 }
254 }
255 else
256 {
257 if (EX == -42)
258 {
259 z[1] = X[1] + TWO10;
260 z[2] = X[2];
261 k = 3;
262 }
263 else if (EX == -43)
264 {
265 z[1] = TWO10;
266 z[2] = X[1];
267 k = 2;
268 }
269 else
270 {
271 z[1] = TWO10;
272 z[2] = ZERO;
273 k = 1;
274 }
275 z[3] = X[k];
276 }
e4d82761
UD
277
278 u = (z[3] + TWO57) - TWO57;
1066a534
SP
279 if (u > z[3])
280 u -= TWO5;
e4d82761 281
1066a534
SP
282 if (u == z[3])
283 {
284 for (i = k + 1; i <= p; i++)
285 {
286 if (X[i] == ZERO)
287 continue;
288 else
289 {
290 z[3] += ONE;
291 break;
292 }
293 }
e4d82761 294 }
e4d82761 295
1066a534 296 c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10);
e4d82761 297
1066a534 298 *y = c * TWOM1032;
e4d82761
UD
299#undef R
300}
301
950c99ca
SP
302/* Convert multiple precision number *X into double precision number *Y. The
303 result is correctly rounded to the nearest/even. */
1066a534
SP
304void
305__mp_dbl (const mp_no *x, double *y, int p)
306{
307 if (X[0] == ZERO)
308 {
309 *y = ZERO;
310 return;
311 }
e4d82761 312
18ea052c 313 if (__glibc_likely (EX > -42 || (EX == -42 && X[1] >= TWO10)))
1066a534 314 norm (x, y, p);
18ea052c 315 else
1066a534 316 denorm (x, y, p);
e4d82761 317}
af968f62 318#endif
e4d82761 319
950c99ca
SP
320/* Get the multiple precision equivalent of X into *Y. If the precision is too
321 small, the result is truncated. */
31d3cc00
UD
322void
323SECTION
1066a534
SP
324__dbl_mp (double x, mp_no *y, int p)
325{
326 int i, n;
e4d82761
UD
327 double u;
328
950c99ca 329 /* Sign. */
1066a534
SP
330 if (x == ZERO)
331 {
332 Y[0] = ZERO;
333 return;
334 }
335 else if (x > ZERO)
336 Y[0] = ONE;
337 else
338 {
339 Y[0] = MONE;
340 x = -x;
341 }
e4d82761 342
950c99ca 343 /* Exponent. */
1066a534
SP
344 for (EY = ONE; x >= RADIX; EY += ONE)
345 x *= RADIXI;
346 for (; x < ONE; EY -= ONE)
347 x *= RADIX;
e4d82761 348
950c99ca 349 /* Digits. */
1066a534
SP
350 n = MIN (p, 4);
351 for (i = 1; i <= n; i++)
352 {
353 u = (x + TWO52) - TWO52;
354 if (u > x)
355 u -= ONE;
356 Y[i] = u;
357 x -= u;
358 x *= RADIX;
359 }
360 for (; i <= p; i++)
361 Y[i] = ZERO;
e4d82761
UD
362}
363
950c99ca
SP
364/* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0. The
365 sign of the sum *Z is not changed. X and Y may overlap but not X and Z or
366 Y and Z. No guard digit is used. The result equals the exact sum,
367 truncated. */
31d3cc00
UD
368static void
369SECTION
1066a534
SP
370add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
371{
372 int i, j, k;
bdf02814 373 double zk;
e4d82761
UD
374
375 EZ = EX;
376
1066a534
SP
377 i = p;
378 j = p + EY - EX;
379 k = p + 1;
380
bdf02814 381 if (__glibc_unlikely (j < 1))
1066a534
SP
382 {
383 __cpy (x, z, p);
384 return;
385 }
bdf02814
SP
386
387 zk = ZERO;
1066a534
SP
388
389 for (; j > 0; i--, j--)
390 {
bdf02814
SP
391 zk += X[i] + Y[j];
392 if (zk >= RADIX)
1066a534 393 {
bdf02814
SP
394 Z[k--] = zk - RADIX;
395 zk = ONE;
1066a534
SP
396 }
397 else
bdf02814
SP
398 {
399 Z[k--] = zk;
400 zk = ZERO;
401 }
1066a534
SP
402 }
403
404 for (; i > 0; i--)
405 {
bdf02814
SP
406 zk += X[i];
407 if (zk >= RADIX)
1066a534 408 {
bdf02814
SP
409 Z[k--] = zk - RADIX;
410 zk = ONE;
1066a534
SP
411 }
412 else
bdf02814
SP
413 {
414 Z[k--] = zk;
415 zk = ZERO;
416 }
1066a534
SP
417 }
418
bdf02814 419 if (zk == ZERO)
1066a534
SP
420 {
421 for (i = 1; i <= p; i++)
422 Z[i] = Z[i + 1];
423 }
424 else
bdf02814
SP
425 {
426 Z[1] = zk;
427 EZ += ONE;
428 }
e4d82761
UD
429}
430
950c99ca
SP
431/* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
432 The sign of the difference *Z is not changed. X and Y may overlap but not X
433 and Z or Y and Z. One guard digit is used. The error is less than one
434 ULP. */
31d3cc00
UD
435static void
436SECTION
1066a534
SP
437sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
438{
439 int i, j, k;
bdf02814 440 double zk;
e4d82761
UD
441
442 EZ = EX;
bdf02814
SP
443 i = p;
444 j = p + EY - EX;
445 k = p;
e4d82761 446
bdf02814
SP
447 /* Y is too small compared to X, copy X over to the result. */
448 if (__glibc_unlikely (j < 1))
1066a534 449 {
bdf02814
SP
450 __cpy (x, z, p);
451 return;
1066a534 452 }
bdf02814
SP
453
454 /* The relevant least significant digit in Y is non-zero, so we factor it in
455 to enhance accuracy. */
456 if (j < p && Y[j + 1] > ZERO)
1066a534 457 {
bdf02814
SP
458 Z[k + 1] = RADIX - Y[j + 1];
459 zk = MONE;
1066a534 460 }
bdf02814
SP
461 else
462 zk = Z[k + 1] = ZERO;
1066a534 463
bdf02814 464 /* Subtract and borrow. */
1066a534
SP
465 for (; j > 0; i--, j--)
466 {
bdf02814
SP
467 zk += (X[i] - Y[j]);
468 if (zk < ZERO)
1066a534 469 {
bdf02814
SP
470 Z[k--] = zk + RADIX;
471 zk = MONE;
1066a534
SP
472 }
473 else
bdf02814
SP
474 {
475 Z[k--] = zk;
476 zk = ZERO;
477 }
1066a534
SP
478 }
479
bdf02814 480 /* We're done with digits from Y, so it's just digits in X. */
1066a534
SP
481 for (; i > 0; i--)
482 {
bdf02814
SP
483 zk += X[i];
484 if (zk < ZERO)
1066a534 485 {
bdf02814
SP
486 Z[k--] = zk + RADIX;
487 zk = MONE;
1066a534
SP
488 }
489 else
bdf02814
SP
490 {
491 Z[k--] = zk;
492 zk = ZERO;
493 }
1066a534
SP
494 }
495
bdf02814 496 /* Normalize. */
1066a534 497 for (i = 1; Z[i] == ZERO; i++);
e4d82761 498 EZ = EZ - i + 1;
1066a534 499 for (k = 1; i <= p + 1;)
e4d82761 500 Z[k++] = Z[i++];
1066a534 501 for (; k <= p;)
e4d82761 502 Z[k++] = ZERO;
e4d82761
UD
503}
504
950c99ca
SP
505/* Add *X and *Y and store the result in *Z. X and Y may overlap, but not X
506 and Z or Y and Z. One guard digit is used. The error is less than one
507 ULP. */
31d3cc00
UD
508void
509SECTION
1066a534
SP
510__add (const mp_no *x, const mp_no *y, mp_no *z, int p)
511{
e4d82761
UD
512 int n;
513
1066a534
SP
514 if (X[0] == ZERO)
515 {
516 __cpy (y, z, p);
517 return;
518 }
519 else if (Y[0] == ZERO)
520 {
521 __cpy (x, z, p);
522 return;
523 }
524
525 if (X[0] == Y[0])
526 {
527 if (__acr (x, y, p) > 0)
528 {
529 add_magnitudes (x, y, z, p);
530 Z[0] = X[0];
531 }
532 else
533 {
534 add_magnitudes (y, x, z, p);
535 Z[0] = Y[0];
536 }
537 }
538 else
539 {
540 if ((n = __acr (x, y, p)) == 1)
541 {
542 sub_magnitudes (x, y, z, p);
543 Z[0] = X[0];
544 }
545 else if (n == -1)
546 {
547 sub_magnitudes (y, x, z, p);
548 Z[0] = Y[0];
549 }
550 else
551 Z[0] = ZERO;
552 }
e4d82761
UD
553}
554
950c99ca
SP
555/* Subtract *Y from *X and return the result in *Z. X and Y may overlap but
556 not X and Z or Y and Z. One guard digit is used. The error is less than
557 one ULP. */
31d3cc00
UD
558void
559SECTION
1066a534
SP
560__sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
561{
e4d82761
UD
562 int n;
563
1066a534
SP
564 if (X[0] == ZERO)
565 {
566 __cpy (y, z, p);
567 Z[0] = -Z[0];
568 return;
569 }
570 else if (Y[0] == ZERO)
571 {
572 __cpy (x, z, p);
573 return;
574 }
575
576 if (X[0] != Y[0])
577 {
578 if (__acr (x, y, p) > 0)
579 {
580 add_magnitudes (x, y, z, p);
581 Z[0] = X[0];
582 }
583 else
584 {
585 add_magnitudes (y, x, z, p);
586 Z[0] = -Y[0];
587 }
588 }
589 else
590 {
591 if ((n = __acr (x, y, p)) == 1)
592 {
593 sub_magnitudes (x, y, z, p);
594 Z[0] = X[0];
595 }
596 else if (n == -1)
597 {
598 sub_magnitudes (y, x, z, p);
599 Z[0] = -Y[0];
600 }
601 else
602 Z[0] = ZERO;
603 }
e4d82761
UD
604}
605
950c99ca
SP
606/* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X
607 and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P
608 digits. In case P > 3 the error is bounded by 1.001 ULP. */
31d3cc00
UD
609void
610SECTION
1066a534
SP
611__mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
612{
909279a5 613 int i, j, k, ip, ip2;
2a91b573 614 double u, zk;
2d0e0f29 615 const mp_no *a;
e4d82761 616
44e0d4c2
SP
617 /* Is z=0? */
618 if (__glibc_unlikely (X[0] * Y[0] == ZERO))
619 {
1066a534 620 Z[0] = ZERO;
44e0d4c2
SP
621 return;
622 }
e4d82761 623
909279a5
SP
624 /* We need not iterate through all X's and Y's since it's pointless to
625 multiply zeroes. Here, both are zero... */
626 for (ip2 = p; ip2 > 0; ip2--)
627 if (X[ip2] != ZERO || Y[ip2] != ZERO)
628 break;
44e0d4c2 629
2d0e0f29
SP
630 a = X[ip2] != ZERO ? y : x;
631
909279a5
SP
632 /* ... and here, at least one of them is still zero. */
633 for (ip = ip2; ip > 0; ip--)
2d0e0f29 634 if (a->d[ip] != ZERO)
909279a5
SP
635 break;
636
637 /* The product looks like this for p = 3 (as an example):
638
639
640 a1 a2 a3
641 x b1 b2 b3
642 -----------------------------
643 a1*b3 a2*b3 a3*b3
644 a1*b2 a2*b2 a3*b2
645 a1*b1 a2*b1 a3*b1
646
647 So our K needs to ideally be P*2, but we're limiting ourselves to P + 3
648 for P >= 3. We compute the above digits in two parts; the last P-1
649 digits and then the first P digits. The last P-1 digits are a sum of
650 products of the input digits from P to P-k where K is 0 for the least
651 significant digit and increases as we go towards the left. The product
652 term is of the form X[k]*X[P-k] as can be seen in the above example.
653
654 The first P digits are also a sum of products with the same product term,
655 except that the sum is from 1 to k. This is also evident from the above
656 example.
657
658 Another thing that becomes evident is that only the most significant
659 ip+ip2 digits of the result are non-zero, where ip and ip2 are the
660 'internal precision' of the input numbers, i.e. digits after ip and ip2
661 are all ZERO. */
662
663 k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
664
665 while (k > ip + ip2 + 1)
666 Z[k--] = ZERO;
667
668 zk = Z[k] = ZERO;
669
670 while (k > p)
44e0d4c2
SP
671 {
672 for (i = k - p, j = p; i < p + 1; i++, j--)
2a91b573 673 zk += X[i] * Y[j];
44e0d4c2 674
2a91b573
SP
675 u = (zk + CUTTER) - CUTTER;
676 if (u > zk)
44e0d4c2 677 u -= RADIX;
909279a5 678 Z[k--] = zk - u;
2a91b573 679 zk = u * RADIXI;
44e0d4c2
SP
680 }
681
909279a5 682 /* The real deal. */
44e0d4c2
SP
683 while (k > 1)
684 {
1066a534 685 for (i = 1, j = k - 1; i < k; i++, j--)
2a91b573 686 zk += X[i] * Y[j];
44e0d4c2 687
2a91b573
SP
688 u = (zk + CUTTER) - CUTTER;
689 if (u > zk)
44e0d4c2 690 u -= RADIX;
909279a5 691 Z[k--] = zk - u;
2a91b573 692 zk = u * RADIXI;
44e0d4c2 693 }
2a91b573 694 Z[k] = zk;
44e0d4c2 695
4709fe76
SP
696 /* Get the exponent sum into an intermediate variable. This is a subtle
697 optimization, where given enough registers, all operations on the exponent
698 happen in registers and the result is written out only once into EZ. */
699 int e = EX + EY;
700
950c99ca 701 /* Is there a carry beyond the most significant digit? */
44e0d4c2
SP
702 if (__glibc_unlikely (Z[1] == ZERO))
703 {
704 for (i = 1; i <= p; i++)
1066a534 705 Z[i] = Z[i + 1];
4709fe76 706 e--;
44e0d4c2 707 }
e4d82761 708
4709fe76 709 EZ = e;
e4d82761 710 Z[0] = X[0] * Y[0];
e4d82761
UD
711}
712
d6752ccd
SP
713/* Square *X and store result in *Y. X and Y may not overlap. For P in
714 [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the
715 error is bounded by 1.001 ULP. This is a faster special case of
716 multiplication. */
717void
718SECTION
719__sqr (const mp_no *x, mp_no *y, int p)
720{
721 long i, j, k, ip;
722 double u, yk;
723
724 /* Is z=0? */
725 if (__glibc_unlikely (X[0] == ZERO))
726 {
727 Y[0] = ZERO;
728 return;
729 }
730
731 /* We need not iterate through all X's since it's pointless to
732 multiply zeroes. */
733 for (ip = p; ip > 0; ip--)
734 if (X[ip] != ZERO)
735 break;
736
737 k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
738
739 while (k > 2 * ip + 1)
740 Y[k--] = ZERO;
741
742 yk = ZERO;
743
744 while (k > p)
745 {
746 double yk2 = 0.0;
747 long lim = k / 2;
748
749 if (k % 2 == 0)
750 {
751 yk += X[lim] * X[lim];
752 lim--;
753 }
754
755 for (i = k - p, j = p; i <= lim; i++, j--)
756 yk2 += X[i] * X[j];
757
758 yk += 2.0 * yk2;
759
760 u = (yk + CUTTER) - CUTTER;
761 if (u > yk)
762 u -= RADIX;
763 Y[k--] = yk - u;
764 yk = u * RADIXI;
765 }
766
767 while (k > 1)
768 {
769 double yk2 = 0.0;
770 long lim = k / 2;
771
772 if (k % 2 == 0)
773 {
774 yk += X[lim] * X[lim];
775 lim--;
776 }
777
778 for (i = 1, j = k - 1; i <= lim; i++, j--)
779 yk2 += X[i] * X[j];
780
781 yk += 2.0 * yk2;
782
783 u = (yk + CUTTER) - CUTTER;
784 if (u > yk)
785 u -= RADIX;
786 Y[k--] = yk - u;
787 yk = u * RADIXI;
788 }
789 Y[k] = yk;
790
791 /* Squares are always positive. */
792 Y[0] = 1.0;
793
4709fe76
SP
794 /* Get the exponent sum into an intermediate variable. This is a subtle
795 optimization, where given enough registers, all operations on the exponent
796 happen in registers and the result is written out only once into EZ. */
797 int e = EX * 2;
798
d6752ccd
SP
799 /* Is there a carry beyond the most significant digit? */
800 if (__glibc_unlikely (Y[1] == ZERO))
801 {
802 for (i = 1; i <= p; i++)
803 Y[i] = Y[i + 1];
4709fe76 804 e--;
d6752ccd 805 }
4709fe76
SP
806
807 EY = e;
d6752ccd
SP
808}
809
950c99ca
SP
810/* Invert *X and store in *Y. Relative error bound:
811 - For P = 2: 1.001 * R ^ (1 - P)
812 - For P = 3: 1.063 * R ^ (1 - P)
813 - For P > 3: 2.001 * R ^ (1 - P)
e4d82761 814
950c99ca 815 *X = 0 is not permissible. */
1066a534 816static void
31d3cc00 817SECTION
1066a534
SP
818__inv (const mp_no *x, mp_no *y, int p)
819{
50944bca 820 int i;
e4d82761 821 double t;
1066a534
SP
822 mp_no z, w;
823 static const int np1[] =
824 { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
825 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
826 };
827
828 __cpy (x, &z, p);
829 z.e = 0;
830 __mp_dbl (&z, &t, p);
831 t = ONE / t;
832 __dbl_mp (t, y, p);
833 EY -= EX;
834
835 for (i = 0; i < np1[p]; i++)
836 {
837 __cpy (y, &w, p);
838 __mul (x, &w, y, p);
839 __sub (&mptwo, y, &z, p);
840 __mul (&w, &z, y, p);
841 }
e4d82761
UD
842}
843
950c99ca
SP
844/* Divide *X by *Y and store result in *Z. X and Y may overlap but not X and Z
845 or Y and Z. Relative error bound:
846 - For P = 2: 2.001 * R ^ (1 - P)
847 - For P = 3: 2.063 * R ^ (1 - P)
848 - For P > 3: 3.001 * R ^ (1 - P)
e4d82761 849
950c99ca 850 *X = 0 is not permissible. */
31d3cc00
UD
851void
852SECTION
1066a534
SP
853__dvd (const mp_no *x, const mp_no *y, mp_no *z, int p)
854{
e4d82761
UD
855 mp_no w;
856
1066a534
SP
857 if (X[0] == ZERO)
858 Z[0] = ZERO;
859 else
860 {
861 __inv (y, &w, p);
862 __mul (x, &w, z, p);
863 }
e4d82761 864}