]>
Commit | Line | Data |
---|---|---|
644cb69f | 1 | /* Implementation of the MATMUL intrinsic |
6ff24d45 | 2 | Copyright 2002, 2005, 2006 Free Software Foundation, Inc. |
644cb69f FXC |
3 | Contributed by Paul Brook <paul@nowt.org> |
4 | ||
5 | This file is part of the GNU Fortran 95 runtime library (libgfortran). | |
6 | ||
7 | Libgfortran is free software; you can redistribute it and/or | |
8 | modify it under the terms of the GNU General Public | |
9 | License as published by the Free Software Foundation; either | |
10 | version 2 of the License, or (at your option) any later version. | |
11 | ||
12 | In addition to the permissions in the GNU General Public License, the | |
13 | Free Software Foundation gives you unlimited permission to link the | |
14 | compiled version of this file into combinations with other programs, | |
15 | and to distribute those combinations without any restriction coming | |
16 | from the use of this file. (The General Public License restrictions | |
17 | do apply in other respects; for example, they cover modification of | |
18 | the file, and distribution when not linked into a combine | |
19 | executable.) | |
20 | ||
21 | Libgfortran is distributed in the hope that it will be useful, | |
22 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
23 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
24 | GNU General Public License for more details. | |
25 | ||
26 | You should have received a copy of the GNU General Public | |
27 | License along with libgfortran; see the file COPYING. If not, | |
28 | write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
29 | Boston, MA 02110-1301, USA. */ | |
30 | ||
31 | #include "config.h" | |
32 | #include <stdlib.h> | |
33 | #include <string.h> | |
34 | #include <assert.h> | |
35 | #include "libgfortran.h" | |
36 | ||
37 | #if defined (HAVE_GFC_REAL_16) | |
38 | ||
1524f80b RS |
39 | /* The order of loops is different in the case of plain matrix |
40 | multiplication C=MATMUL(A,B), and in the frequent special case where | |
41 | the argument A is the temporary result of a TRANSPOSE intrinsic: | |
42 | C=MATMUL(TRANSPOSE(A),B). Transposed temporaries are detected by | |
43 | looking at their strides. | |
44 | ||
45 | The equivalent Fortran pseudo-code is: | |
644cb69f FXC |
46 | |
47 | DIMENSION A(M,COUNT), B(COUNT,N), C(M,N) | |
1524f80b RS |
48 | IF (.NOT.IS_TRANSPOSED(A)) THEN |
49 | C = 0 | |
50 | DO J=1,N | |
51 | DO K=1,COUNT | |
52 | DO I=1,M | |
53 | C(I,J) = C(I,J)+A(I,K)*B(K,J) | |
54 | ELSE | |
55 | DO J=1,N | |
644cb69f | 56 | DO I=1,M |
1524f80b RS |
57 | S = 0 |
58 | DO K=1,COUNT | |
59 | S = S+A(I,K)+B(K,J) | |
60 | C(I,J) = S | |
61 | ENDIF | |
644cb69f FXC |
62 | */ |
63 | ||
85206901 JB |
64 | extern void matmul_r16 (gfc_array_r16 * const restrict retarray, |
65 | gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b); | |
644cb69f FXC |
66 | export_proto(matmul_r16); |
67 | ||
68 | void | |
85206901 JB |
69 | matmul_r16 (gfc_array_r16 * const restrict retarray, |
70 | gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b) | |
644cb69f | 71 | { |
85206901 JB |
72 | const GFC_REAL_16 * restrict abase; |
73 | const GFC_REAL_16 * restrict bbase; | |
74 | GFC_REAL_16 * restrict dest; | |
644cb69f FXC |
75 | |
76 | index_type rxstride, rystride, axstride, aystride, bxstride, bystride; | |
77 | index_type x, y, n, count, xcount, ycount; | |
78 | ||
79 | assert (GFC_DESCRIPTOR_RANK (a) == 2 | |
80 | || GFC_DESCRIPTOR_RANK (b) == 2); | |
81 | ||
82 | /* C[xcount,ycount] = A[xcount, count] * B[count,ycount] | |
83 | ||
84 | Either A or B (but not both) can be rank 1: | |
85 | ||
86 | o One-dimensional argument A is implicitly treated as a row matrix | |
87 | dimensioned [1,count], so xcount=1. | |
88 | ||
89 | o One-dimensional argument B is implicitly treated as a column matrix | |
90 | dimensioned [count, 1], so ycount=1. | |
91 | */ | |
92 | ||
93 | if (retarray->data == NULL) | |
94 | { | |
95 | if (GFC_DESCRIPTOR_RANK (a) == 1) | |
96 | { | |
97 | retarray->dim[0].lbound = 0; | |
98 | retarray->dim[0].ubound = b->dim[1].ubound - b->dim[1].lbound; | |
99 | retarray->dim[0].stride = 1; | |
100 | } | |
101 | else if (GFC_DESCRIPTOR_RANK (b) == 1) | |
102 | { | |
103 | retarray->dim[0].lbound = 0; | |
104 | retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound; | |
105 | retarray->dim[0].stride = 1; | |
106 | } | |
107 | else | |
108 | { | |
109 | retarray->dim[0].lbound = 0; | |
110 | retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound; | |
111 | retarray->dim[0].stride = 1; | |
112 | ||
113 | retarray->dim[1].lbound = 0; | |
114 | retarray->dim[1].ubound = b->dim[1].ubound - b->dim[1].lbound; | |
115 | retarray->dim[1].stride = retarray->dim[0].ubound+1; | |
116 | } | |
117 | ||
118 | retarray->data | |
119 | = internal_malloc_size (sizeof (GFC_REAL_16) * size0 ((array_t *) retarray)); | |
120 | retarray->offset = 0; | |
121 | } | |
122 | ||
644cb69f FXC |
123 | |
124 | if (GFC_DESCRIPTOR_RANK (retarray) == 1) | |
125 | { | |
126 | /* One-dimensional result may be addressed in the code below | |
127 | either as a row or a column matrix. We want both cases to | |
128 | work. */ | |
129 | rxstride = rystride = retarray->dim[0].stride; | |
130 | } | |
131 | else | |
132 | { | |
133 | rxstride = retarray->dim[0].stride; | |
134 | rystride = retarray->dim[1].stride; | |
135 | } | |
136 | ||
137 | ||
138 | if (GFC_DESCRIPTOR_RANK (a) == 1) | |
139 | { | |
140 | /* Treat it as a a row matrix A[1,count]. */ | |
141 | axstride = a->dim[0].stride; | |
142 | aystride = 1; | |
143 | ||
144 | xcount = 1; | |
145 | count = a->dim[0].ubound + 1 - a->dim[0].lbound; | |
146 | } | |
147 | else | |
148 | { | |
149 | axstride = a->dim[0].stride; | |
150 | aystride = a->dim[1].stride; | |
151 | ||
152 | count = a->dim[1].ubound + 1 - a->dim[1].lbound; | |
153 | xcount = a->dim[0].ubound + 1 - a->dim[0].lbound; | |
154 | } | |
155 | ||
156 | assert(count == b->dim[0].ubound + 1 - b->dim[0].lbound); | |
157 | ||
158 | if (GFC_DESCRIPTOR_RANK (b) == 1) | |
159 | { | |
160 | /* Treat it as a column matrix B[count,1] */ | |
161 | bxstride = b->dim[0].stride; | |
162 | ||
163 | /* bystride should never be used for 1-dimensional b. | |
164 | in case it is we want it to cause a segfault, rather than | |
165 | an incorrect result. */ | |
166 | bystride = 0xDEADBEEF; | |
167 | ycount = 1; | |
168 | } | |
169 | else | |
170 | { | |
171 | bxstride = b->dim[0].stride; | |
172 | bystride = b->dim[1].stride; | |
173 | ycount = b->dim[1].ubound + 1 - b->dim[1].lbound; | |
174 | } | |
175 | ||
176 | abase = a->data; | |
177 | bbase = b->data; | |
178 | dest = retarray->data; | |
179 | ||
180 | if (rxstride == 1 && axstride == 1 && bxstride == 1) | |
181 | { | |
85206901 JB |
182 | const GFC_REAL_16 * restrict bbase_y; |
183 | GFC_REAL_16 * restrict dest_y; | |
184 | const GFC_REAL_16 * restrict abase_n; | |
644cb69f FXC |
185 | GFC_REAL_16 bbase_yn; |
186 | ||
1633cb7c FXC |
187 | if (rystride == xcount) |
188 | memset (dest, 0, (sizeof (GFC_REAL_16) * xcount * ycount)); | |
644cb69f FXC |
189 | else |
190 | { | |
191 | for (y = 0; y < ycount; y++) | |
192 | for (x = 0; x < xcount; x++) | |
193 | dest[x + y*rystride] = (GFC_REAL_16)0; | |
194 | } | |
195 | ||
196 | for (y = 0; y < ycount; y++) | |
197 | { | |
198 | bbase_y = bbase + y*bystride; | |
199 | dest_y = dest + y*rystride; | |
200 | for (n = 0; n < count; n++) | |
201 | { | |
202 | abase_n = abase + n*aystride; | |
203 | bbase_yn = bbase_y[n]; | |
204 | for (x = 0; x < xcount; x++) | |
205 | { | |
206 | dest_y[x] += abase_n[x] * bbase_yn; | |
207 | } | |
208 | } | |
209 | } | |
210 | } | |
1524f80b RS |
211 | else if (rxstride == 1 && aystride == 1 && bxstride == 1) |
212 | { | |
213 | const GFC_REAL_16 *restrict abase_x; | |
214 | const GFC_REAL_16 *restrict bbase_y; | |
215 | GFC_REAL_16 *restrict dest_y; | |
216 | GFC_REAL_16 s; | |
217 | ||
218 | for (y = 0; y < ycount; y++) | |
219 | { | |
220 | bbase_y = &bbase[y*bystride]; | |
221 | dest_y = &dest[y*rystride]; | |
222 | for (x = 0; x < xcount; x++) | |
223 | { | |
224 | abase_x = &abase[x*axstride]; | |
225 | s = (GFC_REAL_16) 0; | |
226 | for (n = 0; n < count; n++) | |
227 | s += abase_x[n] * bbase_y[n]; | |
228 | dest_y[x] = s; | |
229 | } | |
230 | } | |
231 | } | |
232 | else if (axstride < aystride) | |
644cb69f FXC |
233 | { |
234 | for (y = 0; y < ycount; y++) | |
235 | for (x = 0; x < xcount; x++) | |
236 | dest[x*rxstride + y*rystride] = (GFC_REAL_16)0; | |
237 | ||
238 | for (y = 0; y < ycount; y++) | |
239 | for (n = 0; n < count; n++) | |
240 | for (x = 0; x < xcount; x++) | |
241 | /* dest[x,y] += a[x,n] * b[n,y] */ | |
242 | dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride]; | |
243 | } | |
1524f80b RS |
244 | else |
245 | { | |
246 | const GFC_REAL_16 *restrict abase_x; | |
247 | const GFC_REAL_16 *restrict bbase_y; | |
248 | GFC_REAL_16 *restrict dest_y; | |
249 | GFC_REAL_16 s; | |
250 | ||
251 | for (y = 0; y < ycount; y++) | |
252 | { | |
253 | bbase_y = &bbase[y*bystride]; | |
254 | dest_y = &dest[y*rystride]; | |
255 | for (x = 0; x < xcount; x++) | |
256 | { | |
257 | abase_x = &abase[x*axstride]; | |
258 | s = (GFC_REAL_16) 0; | |
259 | for (n = 0; n < count; n++) | |
260 | s += abase_x[n*aystride] * bbase_y[n*bxstride]; | |
261 | dest_y[x*rxstride] = s; | |
262 | } | |
263 | } | |
264 | } | |
644cb69f FXC |
265 | } |
266 | ||
267 | #endif |