]>
git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/powerpc/power4/fpu/mpa.c
9d4d644cf9f65459fdc9a97689ed4fb06b995b93
3 * IBM Accurate Mathematical Library
4 * written by International Business Machines Corp.
5 * Copyright (C) 2001-2013 Free Software Foundation, Inc.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, see <http://www.gnu.org/licenses/>.
21 /* Define __mul and __sqr and use the rest from generic code. */
25 #include <sysdeps/ieee754/dbl-64/mpa.c>
27 /* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X
28 and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P
29 digits. In case P > 3 the error is bounded by 1.001 ULP. */
31 __mul (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
33 long i
, i1
, i2
, j
, k
, k2
;
38 if (__glibc_unlikely (X
[0] * Y
[0] == 0))
44 /* Multiply, add and carry */
45 k2
= (p2
< 3) ? p2
+ p2
: p2
+ 3;
60 /* Rearrange this inner loop to allow the fmadd instructions to be
61 independent and execute in parallel on processors that have
62 dual symmetrical FP pipelines. */
65 /* Make sure we have at least 2 iterations. */
66 if (((i2
- i1
) & 1L) == 1L)
68 /* Handle the odd iterations case. */
69 zk2
= x
->d
[i2
- 1] * y
->d
[i1
];
73 /* Do two multiply/adds per loop iteration, using independent
74 accumulators; zk and zk2. */
75 for (i
= i1
, j
= i2
- 1; i
< i2
- 1; i
+= 2, j
-= 2)
77 zk
+= x
->d
[i
] * y
->d
[j
];
78 zk2
+= x
->d
[i
+ 1] * y
->d
[j
- 1];
80 zk
+= zk2
; /* Final sum. */
84 /* Special case when iterations is 1. */
85 zk
+= x
->d
[i1
] * y
->d
[i1
];
88 /* The original code. */
89 for (i
= i1
, j
= i2
- 1; i
< i2
; i
++, j
--)
93 u
= (zk
+ CUTTER
) - CUTTER
;
103 /* Is there a carry beyond the most significant digit? */
106 for (i
= 1; i
<= p2
; i
++)
115 /* Square *X and store result in *Y. X and Y may not overlap. For P in
116 [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the
117 error is bounded by 1.001 ULP. This is a faster special case of
120 __sqr (const mp_no
*x
, mp_no
*y
, int p
)
126 if (__glibc_unlikely (X
[0] == 0))
132 /* We need not iterate through all X's since it's pointless to
134 for (ip
= p
; ip
> 0; ip
--)
138 k
= (__glibc_unlikely (p
< 3)) ? p
+ p
: p
+ 3;
140 while (k
> 2 * ip
+ 1)
152 yk
+= X
[lim
] * X
[lim
];
156 /* In __mul, this loop (and the one within the next while loop) run
157 between a range to calculate the mantissa as follows:
159 Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
162 For X == Y, we can get away with summing halfway and doubling the
163 result. For cases where the range size is even, the mid-point needs
164 to be added separately (above). */
165 for (i
= k
- p
, j
= p
; i
<= lim
; i
++, j
--)
170 u
= (yk
+ CUTTER
) - CUTTER
;
184 yk
+= X
[lim
] * X
[lim
];
188 /* Likewise for this loop. */
189 for (i
= 1, j
= k
- 1; i
<= lim
; i
++, j
--)
194 u
= (yk
+ CUTTER
) - CUTTER
;
202 /* Squares are always positive. */
206 /* Is there a carry beyond the most significant digit? */
207 if (__glibc_unlikely (Y
[1] == 0))
209 for (i
= 1; i
<= p
; i
++)