]>
Commit | Line | Data |
---|---|---|
6de9cd9a DN |
1 | /* Implementation of the MATMUL intrinsic |
2 | Copyright 2002 Free Software Foundation, Inc. | |
3 | Contributed by Paul Brook <paul@nowt.org> | |
4 | ||
883c9d4d | 5 | This file is part of the GNU Fortran 95 runtime library (libgfortran). |
6de9cd9a DN |
6 | |
7 | Libgfortran is free software; you can redistribute it and/or | |
8 | modify it under the terms of the GNU Lesser General Public | |
9 | License as published by the Free Software Foundation; either | |
10 | version 2.1 of the License, or (at your option) any later version. | |
11 | ||
12 | Libgfortran is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU Lesser General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU Lesser General Public | |
18 | License along with libgfor; see the file COPYING.LIB. If not, | |
19 | write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
20 | Boston, MA 02111-1307, USA. */ | |
21 | ||
22 | #include "config.h" | |
23 | #include <stdlib.h> | |
410d3bba | 24 | #include <string.h> |
6de9cd9a DN |
25 | #include <assert.h> |
26 | #include "libgfortran.h" | |
27 | ||
410d3bba VL |
28 | /* This is a C version of the following fortran pseudo-code. The key |
29 | point is the loop order -- we access all arrays column-first, which | |
30 | improves the performance enough to boost galgel spec score by 50%. | |
31 | ||
32 | DIMENSION A(M,COUNT), B(COUNT,N), C(M,N) | |
33 | C = 0 | |
34 | DO J=1,N | |
35 | DO K=1,COUNT | |
36 | DO I=1,M | |
37 | C(I,J) = C(I,J)+A(I,K)*B(K,J) | |
38 | */ | |
39 | ||
7f68c75f RH |
40 | extern void matmul_r8 (gfc_array_r8 * retarray, gfc_array_r8 * a, gfc_array_r8 * b); |
41 | export_proto(matmul_r8); | |
7d7b8bfe | 42 | |
6de9cd9a | 43 | void |
7f68c75f | 44 | matmul_r8 (gfc_array_r8 * retarray, gfc_array_r8 * a, gfc_array_r8 * b) |
6de9cd9a DN |
45 | { |
46 | GFC_REAL_8 *abase; | |
47 | GFC_REAL_8 *bbase; | |
48 | GFC_REAL_8 *dest; | |
410d3bba VL |
49 | |
50 | index_type rxstride, rystride, axstride, aystride, bxstride, bystride; | |
51 | index_type x, y, n, count, xcount, ycount; | |
6de9cd9a DN |
52 | |
53 | assert (GFC_DESCRIPTOR_RANK (a) == 2 | |
54 | || GFC_DESCRIPTOR_RANK (b) == 2); | |
883c9d4d | 55 | |
410d3bba VL |
56 | /* C[xcount,ycount] = A[xcount, count] * B[count,ycount] |
57 | ||
58 | Either A or B (but not both) can be rank 1: | |
59 | ||
60 | o One-dimensional argument A is implicitly treated as a row matrix | |
61 | dimensioned [1,count], so xcount=1. | |
62 | ||
63 | o One-dimensional argument B is implicitly treated as a column matrix | |
64 | dimensioned [count, 1], so ycount=1. | |
65 | */ | |
66 | ||
883c9d4d VL |
67 | if (retarray->data == NULL) |
68 | { | |
69 | if (GFC_DESCRIPTOR_RANK (a) == 1) | |
70 | { | |
71 | retarray->dim[0].lbound = 0; | |
72 | retarray->dim[0].ubound = b->dim[1].ubound - b->dim[1].lbound; | |
73 | retarray->dim[0].stride = 1; | |
74 | } | |
75 | else if (GFC_DESCRIPTOR_RANK (b) == 1) | |
76 | { | |
77 | retarray->dim[0].lbound = 0; | |
78 | retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound; | |
79 | retarray->dim[0].stride = 1; | |
80 | } | |
81 | else | |
82 | { | |
83 | retarray->dim[0].lbound = 0; | |
84 | retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound; | |
85 | retarray->dim[0].stride = 1; | |
86 | ||
87 | retarray->dim[1].lbound = 0; | |
88 | retarray->dim[1].ubound = b->dim[1].ubound - b->dim[1].lbound; | |
89 | retarray->dim[1].stride = retarray->dim[0].ubound+1; | |
90 | } | |
91 | ||
07d3cebe RH |
92 | retarray->data |
93 | = internal_malloc_size (sizeof (GFC_REAL_8) * size0 (retarray)); | |
883c9d4d VL |
94 | retarray->base = 0; |
95 | } | |
96 | ||
6de9cd9a DN |
97 | abase = a->data; |
98 | bbase = b->data; | |
99 | dest = retarray->data; | |
100 | ||
101 | if (retarray->dim[0].stride == 0) | |
102 | retarray->dim[0].stride = 1; | |
103 | if (a->dim[0].stride == 0) | |
104 | a->dim[0].stride = 1; | |
105 | if (b->dim[0].stride == 0) | |
106 | b->dim[0].stride = 1; | |
107 | ||
108 | ||
109 | if (GFC_DESCRIPTOR_RANK (retarray) == 1) | |
110 | { | |
410d3bba VL |
111 | /* One-dimensional result may be addressed in the code below |
112 | either as a row or a column matrix. We want both cases to | |
113 | work. */ | |
114 | rxstride = rystride = retarray->dim[0].stride; | |
6de9cd9a DN |
115 | } |
116 | else | |
117 | { | |
118 | rxstride = retarray->dim[0].stride; | |
119 | rystride = retarray->dim[1].stride; | |
120 | } | |
121 | ||
410d3bba | 122 | |
6de9cd9a DN |
123 | if (GFC_DESCRIPTOR_RANK (a) == 1) |
124 | { | |
410d3bba VL |
125 | /* Treat it as a a row matrix A[1,count]. */ |
126 | axstride = a->dim[0].stride; | |
127 | aystride = 1; | |
128 | ||
6de9cd9a | 129 | xcount = 1; |
410d3bba | 130 | count = a->dim[0].ubound + 1 - a->dim[0].lbound; |
6de9cd9a DN |
131 | } |
132 | else | |
133 | { | |
410d3bba VL |
134 | axstride = a->dim[0].stride; |
135 | aystride = a->dim[1].stride; | |
136 | ||
6de9cd9a | 137 | count = a->dim[1].ubound + 1 - a->dim[1].lbound; |
6de9cd9a DN |
138 | xcount = a->dim[0].ubound + 1 - a->dim[0].lbound; |
139 | } | |
410d3bba VL |
140 | |
141 | assert(count == b->dim[0].ubound + 1 - b->dim[0].lbound); | |
142 | ||
6de9cd9a DN |
143 | if (GFC_DESCRIPTOR_RANK (b) == 1) |
144 | { | |
410d3bba VL |
145 | /* Treat it as a column matrix B[count,1] */ |
146 | bxstride = b->dim[0].stride; | |
147 | ||
148 | /* bystride should never be used for 1-dimensional b. | |
149 | in case it is we want it to cause a segfault, rather than | |
150 | an incorrect result. */ | |
151 | bystride = 0xDEADBEEF; | |
6de9cd9a DN |
152 | ycount = 1; |
153 | } | |
154 | else | |
155 | { | |
410d3bba VL |
156 | bxstride = b->dim[0].stride; |
157 | bystride = b->dim[1].stride; | |
6de9cd9a DN |
158 | ycount = b->dim[1].ubound + 1 - b->dim[1].lbound; |
159 | } | |
160 | ||
410d3bba VL |
161 | assert (a->base == 0); |
162 | assert (b->base == 0); | |
163 | assert (retarray->base == 0); | |
164 | ||
165 | abase = a->data; | |
166 | bbase = b->data; | |
167 | dest = retarray->data; | |
168 | ||
169 | if (rxstride == 1 && axstride == 1 && bxstride == 1) | |
6de9cd9a | 170 | { |
410d3bba VL |
171 | GFC_REAL_8 *bbase_y; |
172 | GFC_REAL_8 *dest_y; | |
173 | GFC_REAL_8 *abase_n; | |
174 | GFC_REAL_8 bbase_yn; | |
175 | ||
176 | memset (dest, 0, (sizeof (GFC_REAL_8) * size0(retarray))); | |
177 | ||
178 | for (y = 0; y < ycount; y++) | |
179 | { | |
180 | bbase_y = bbase + y*bystride; | |
181 | dest_y = dest + y*rystride; | |
182 | for (n = 0; n < count; n++) | |
183 | { | |
184 | abase_n = abase + n*aystride; | |
185 | bbase_yn = bbase_y[n]; | |
186 | for (x = 0; x < xcount; x++) | |
187 | { | |
188 | dest_y[x] += abase_n[x] * bbase_yn; | |
189 | } | |
190 | } | |
191 | } | |
192 | } | |
193 | else | |
194 | { | |
195 | for (y = 0; y < ycount; y++) | |
196 | for (x = 0; x < xcount; x++) | |
197 | dest[x*rxstride + y*rystride] = (GFC_REAL_8)0; | |
198 | ||
199 | for (y = 0; y < ycount; y++) | |
200 | for (n = 0; n < count; n++) | |
201 | for (x = 0; x < xcount; x++) | |
202 | /* dest[x,y] += a[x,n] * b[n,y] */ | |
203 | dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride]; | |
6de9cd9a DN |
204 | } |
205 | } |