]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/powerpc/power4/fpu/mpa.c
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / powerpc / power4 / fpu / mpa.c
CommitLineData
04067002
UD
1
2/*
3 * IBM Accurate Mathematical Library
4 * written by International Business Machines Corp.
04277e02 5 * Copyright (C) 2001-2019 Free Software Foundation, Inc.
04067002
UD
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
59ba27a6 18 * along with this program; if not, see <http://www.gnu.org/licenses/>.
04067002 19 */
04067002 20
82a9811d
SP
21/* Define __mul and __sqr and use the rest from generic code. */
22#define NO__MUL
23#define NO__SQR
04067002 24
82a9811d 25#include <sysdeps/ieee754/dbl-64/mpa.c>
04067002 26
950c99ca
SP
27/* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X
28 and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P
29 digits. In case P > 3 the error is bounded by 1.001 ULP. */
1066a534
SP
30void
31__mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
32{
04067002
UD
33 long i, i1, i2, j, k, k2;
34 long p2 = p;
35 double u, zk, zk2;
36
20cd7fb3 37 /* Is z=0? */
a64d7e0e 38 if (__glibc_unlikely (X[0] * Y[0] == 0))
1066a534 39 {
a64d7e0e 40 Z[0] = 0;
1066a534
SP
41 return;
42 }
04067002 43
1066a534
SP
44 /* Multiply, add and carry */
45 k2 = (p2 < 3) ? p2 + p2 : p2 + 3;
a64d7e0e 46 zk = Z[k2] = 0;
1066a534 47 for (k = k2; k > 1;)
04067002 48 {
1066a534 49 if (k > p2)
04067002 50 {
1066a534
SP
51 i1 = k - p2;
52 i2 = p2 + 1;
04067002 53 }
1066a534 54 else
04067002 55 {
1066a534
SP
56 i1 = 1;
57 i2 = k;
58 }
59#if 1
60 /* Rearrange this inner loop to allow the fmadd instructions to be
61 independent and execute in parallel on processors that have
62 dual symmetrical FP pipelines. */
63 if (i1 < (i2 - 1))
64 {
65 /* Make sure we have at least 2 iterations. */
66 if (((i2 - i1) & 1L) == 1L)
67 {
68 /* Handle the odd iterations case. */
69 zk2 = x->d[i2 - 1] * y->d[i1];
70 }
71 else
72 zk2 = 0.0;
73 /* Do two multiply/adds per loop iteration, using independent
74 accumulators; zk and zk2. */
75 for (i = i1, j = i2 - 1; i < i2 - 1; i += 2, j -= 2)
76 {
77 zk += x->d[i] * y->d[j];
78 zk2 += x->d[i + 1] * y->d[j - 1];
79 }
80 zk += zk2; /* Final sum. */
81 }
82 else
83 {
84 /* Special case when iterations is 1. */
85 zk += x->d[i1] * y->d[i1];
04067002 86 }
04067002 87#else
1066a534
SP
88 /* The original code. */
89 for (i = i1, j = i2 - 1; i < i2; i++, j--)
90 zk += X[i] * Y[j];
04067002
UD
91#endif
92
1066a534
SP
93 u = (zk + CUTTER) - CUTTER;
94 if (u > zk)
95 u -= RADIX;
96 Z[k] = zk - u;
97 zk = u * RADIXI;
98 --k;
99 }
04067002
UD
100 Z[k] = zk;
101
e6ebd4a7 102 int e = EX + EY;
950c99ca 103 /* Is there a carry beyond the most significant digit? */
a64d7e0e 104 if (Z[1] == 0)
1066a534
SP
105 {
106 for (i = 1; i <= p2; i++)
107 Z[i] = Z[i + 1];
e6ebd4a7 108 e--;
1066a534 109 }
04067002 110
e6ebd4a7 111 EZ = e;
04067002 112 Z[0] = X[0] * Y[0];
04067002
UD
113}
114
d6752ccd
SP
115/* Square *X and store result in *Y. X and Y may not overlap. For P in
116 [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the
117 error is bounded by 1.001 ULP. This is a faster special case of
118 multiplication. */
119void
120__sqr (const mp_no *x, mp_no *y, int p)
121{
122 long i, j, k, ip;
123 double u, yk;
124
125 /* Is z=0? */
a64d7e0e 126 if (__glibc_unlikely (X[0] == 0))
d6752ccd 127 {
a64d7e0e 128 Y[0] = 0;
d6752ccd
SP
129 return;
130 }
131
132 /* We need not iterate through all X's since it's pointless to
133 multiply zeroes. */
134 for (ip = p; ip > 0; ip--)
a64d7e0e 135 if (X[ip] != 0)
d6752ccd
SP
136 break;
137
138 k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
139
140 while (k > 2 * ip + 1)
a64d7e0e 141 Y[k--] = 0;
d6752ccd 142
a64d7e0e 143 yk = 0;
d6752ccd
SP
144
145 while (k > p)
146 {
147 double yk2 = 0.0;
148 long lim = k / 2;
149
150 if (k % 2 == 0)
151 {
152 yk += X[lim] * X[lim];
153 lim--;
154 }
155
156 /* In __mul, this loop (and the one within the next while loop) run
157 between a range to calculate the mantissa as follows:
158
159 Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
160 + X[n] * Y[k]
161
162 For X == Y, we can get away with summing halfway and doubling the
163 result. For cases where the range size is even, the mid-point needs
164 to be added separately (above). */
165 for (i = k - p, j = p; i <= lim; i++, j--)
166 yk2 += X[i] * X[j];
167
168 yk += 2.0 * yk2;
169
170 u = (yk + CUTTER) - CUTTER;
171 if (u > yk)
172 u -= RADIX;
173 Y[k--] = yk - u;
174 yk = u * RADIXI;
175 }
176
177 while (k > 1)
178 {
179 double yk2 = 0.0;
180 long lim = k / 2;
181
182 if (k % 2 == 0)
183 {
184 yk += X[lim] * X[lim];
185 lim--;
186 }
187
188 /* Likewise for this loop. */
189 for (i = 1, j = k - 1; i <= lim; i++, j--)
190 yk2 += X[i] * X[j];
191
192 yk += 2.0 * yk2;
193
194 u = (yk + CUTTER) - CUTTER;
195 if (u > yk)
196 u -= RADIX;
197 Y[k--] = yk - u;
198 yk = u * RADIXI;
199 }
200 Y[k] = yk;
201
202 /* Squares are always positive. */
203 Y[0] = 1.0;
204
e6ebd4a7 205 int e = EX * 2;
d6752ccd 206 /* Is there a carry beyond the most significant digit? */
a64d7e0e 207 if (__glibc_unlikely (Y[1] == 0))
d6752ccd
SP
208 {
209 for (i = 1; i <= p; i++)
210 Y[i] = Y[i + 1];
e6ebd4a7 211 e--;
d6752ccd 212 }
e6ebd4a7 213 EY = e;
d6752ccd 214}