]>
Commit | Line | Data |
---|---|---|
6de9cd9a | 1 | /* Implementation of the MATMUL intrinsic |
4b6903ec | 2 | Copyright 2002, 2005 Free Software Foundation, Inc. |
6de9cd9a DN |
3 | Contributed by Paul Brook <paul@nowt.org> |
4 | ||
883c9d4d | 5 | This file is part of the GNU Fortran 95 runtime library (libgfortran). |
6de9cd9a DN |
6 | |
7 | Libgfortran is free software; you can redistribute it and/or | |
57dea9f6 | 8 | modify it under the terms of the GNU General Public |
6de9cd9a | 9 | License as published by the Free Software Foundation; either |
57dea9f6 TM |
10 | version 2 of the License, or (at your option) any later version. |
11 | ||
12 | In addition to the permissions in the GNU General Public License, the | |
13 | Free Software Foundation gives you unlimited permission to link the | |
14 | compiled version of this file into combinations with other programs, | |
15 | and to distribute those combinations without any restriction coming | |
16 | from the use of this file. (The General Public License restrictions | |
17 | do apply in other respects; for example, they cover modification of | |
18 | the file, and distribution when not linked into a combine | |
19 | executable.) | |
6de9cd9a DN |
20 | |
21 | Libgfortran is distributed in the hope that it will be useful, | |
22 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
23 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
57dea9f6 | 24 | GNU General Public License for more details. |
6de9cd9a | 25 | |
57dea9f6 TM |
26 | You should have received a copy of the GNU General Public |
27 | License along with libgfortran; see the file COPYING. If not, | |
fe2ae685 KC |
28 | write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
29 | Boston, MA 02110-1301, USA. */ | |
6de9cd9a DN |
30 | |
31 | #include "config.h" | |
32 | #include <stdlib.h> | |
410d3bba | 33 | #include <string.h> |
6de9cd9a DN |
34 | #include <assert.h> |
35 | #include "libgfortran.h" | |
36 | ||
644cb69f FXC |
37 | #if defined (HAVE_GFC_INTEGER_8) |
38 | ||
410d3bba VL |
39 | /* This is a C version of the following fortran pseudo-code. The key |
40 | point is the loop order -- we access all arrays column-first, which | |
41 | improves the performance enough to boost galgel spec score by 50%. | |
42 | ||
43 | DIMENSION A(M,COUNT), B(COUNT,N), C(M,N) | |
44 | C = 0 | |
45 | DO J=1,N | |
46 | DO K=1,COUNT | |
47 | DO I=1,M | |
48 | C(I,J) = C(I,J)+A(I,K)*B(K,J) | |
49 | */ | |
50 | ||
85206901 JB |
51 | extern void matmul_i8 (gfc_array_i8 * const restrict retarray, |
52 | gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b); | |
7f68c75f | 53 | export_proto(matmul_i8); |
7d7b8bfe | 54 | |
6de9cd9a | 55 | void |
85206901 JB |
56 | matmul_i8 (gfc_array_i8 * const restrict retarray, |
57 | gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b) | |
6de9cd9a | 58 | { |
85206901 JB |
59 | const GFC_INTEGER_8 * restrict abase; |
60 | const GFC_INTEGER_8 * restrict bbase; | |
61 | GFC_INTEGER_8 * restrict dest; | |
410d3bba VL |
62 | |
63 | index_type rxstride, rystride, axstride, aystride, bxstride, bystride; | |
64 | index_type x, y, n, count, xcount, ycount; | |
6de9cd9a DN |
65 | |
66 | assert (GFC_DESCRIPTOR_RANK (a) == 2 | |
67 | || GFC_DESCRIPTOR_RANK (b) == 2); | |
883c9d4d | 68 | |
410d3bba VL |
69 | /* C[xcount,ycount] = A[xcount, count] * B[count,ycount] |
70 | ||
71 | Either A or B (but not both) can be rank 1: | |
72 | ||
73 | o One-dimensional argument A is implicitly treated as a row matrix | |
74 | dimensioned [1,count], so xcount=1. | |
75 | ||
76 | o One-dimensional argument B is implicitly treated as a column matrix | |
77 | dimensioned [count, 1], so ycount=1. | |
78 | */ | |
79 | ||
883c9d4d VL |
80 | if (retarray->data == NULL) |
81 | { | |
82 | if (GFC_DESCRIPTOR_RANK (a) == 1) | |
83 | { | |
84 | retarray->dim[0].lbound = 0; | |
85 | retarray->dim[0].ubound = b->dim[1].ubound - b->dim[1].lbound; | |
86 | retarray->dim[0].stride = 1; | |
87 | } | |
88 | else if (GFC_DESCRIPTOR_RANK (b) == 1) | |
89 | { | |
90 | retarray->dim[0].lbound = 0; | |
91 | retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound; | |
92 | retarray->dim[0].stride = 1; | |
93 | } | |
94 | else | |
95 | { | |
96 | retarray->dim[0].lbound = 0; | |
97 | retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound; | |
98 | retarray->dim[0].stride = 1; | |
420aa7b8 | 99 | |
883c9d4d VL |
100 | retarray->dim[1].lbound = 0; |
101 | retarray->dim[1].ubound = b->dim[1].ubound - b->dim[1].lbound; | |
102 | retarray->dim[1].stride = retarray->dim[0].ubound+1; | |
103 | } | |
420aa7b8 | 104 | |
07d3cebe | 105 | retarray->data |
4b6903ec | 106 | = internal_malloc_size (sizeof (GFC_INTEGER_8) * size0 ((array_t *) retarray)); |
efd4dc1a | 107 | retarray->offset = 0; |
883c9d4d VL |
108 | } |
109 | ||
6de9cd9a DN |
110 | if (retarray->dim[0].stride == 0) |
111 | retarray->dim[0].stride = 1; | |
85206901 JB |
112 | |
113 | /* This prevents constifying the input arguments. */ | |
6de9cd9a DN |
114 | if (a->dim[0].stride == 0) |
115 | a->dim[0].stride = 1; | |
116 | if (b->dim[0].stride == 0) | |
117 | b->dim[0].stride = 1; | |
118 | ||
119 | ||
120 | if (GFC_DESCRIPTOR_RANK (retarray) == 1) | |
121 | { | |
410d3bba VL |
122 | /* One-dimensional result may be addressed in the code below |
123 | either as a row or a column matrix. We want both cases to | |
124 | work. */ | |
125 | rxstride = rystride = retarray->dim[0].stride; | |
6de9cd9a DN |
126 | } |
127 | else | |
128 | { | |
129 | rxstride = retarray->dim[0].stride; | |
130 | rystride = retarray->dim[1].stride; | |
131 | } | |
132 | ||
410d3bba | 133 | |
6de9cd9a DN |
134 | if (GFC_DESCRIPTOR_RANK (a) == 1) |
135 | { | |
410d3bba VL |
136 | /* Treat it as a a row matrix A[1,count]. */ |
137 | axstride = a->dim[0].stride; | |
138 | aystride = 1; | |
139 | ||
6de9cd9a | 140 | xcount = 1; |
410d3bba | 141 | count = a->dim[0].ubound + 1 - a->dim[0].lbound; |
6de9cd9a DN |
142 | } |
143 | else | |
144 | { | |
410d3bba VL |
145 | axstride = a->dim[0].stride; |
146 | aystride = a->dim[1].stride; | |
147 | ||
6de9cd9a | 148 | count = a->dim[1].ubound + 1 - a->dim[1].lbound; |
6de9cd9a DN |
149 | xcount = a->dim[0].ubound + 1 - a->dim[0].lbound; |
150 | } | |
410d3bba VL |
151 | |
152 | assert(count == b->dim[0].ubound + 1 - b->dim[0].lbound); | |
153 | ||
6de9cd9a DN |
154 | if (GFC_DESCRIPTOR_RANK (b) == 1) |
155 | { | |
410d3bba VL |
156 | /* Treat it as a column matrix B[count,1] */ |
157 | bxstride = b->dim[0].stride; | |
158 | ||
159 | /* bystride should never be used for 1-dimensional b. | |
160 | in case it is we want it to cause a segfault, rather than | |
161 | an incorrect result. */ | |
420aa7b8 | 162 | bystride = 0xDEADBEEF; |
6de9cd9a DN |
163 | ycount = 1; |
164 | } | |
165 | else | |
166 | { | |
410d3bba VL |
167 | bxstride = b->dim[0].stride; |
168 | bystride = b->dim[1].stride; | |
6de9cd9a DN |
169 | ycount = b->dim[1].ubound + 1 - b->dim[1].lbound; |
170 | } | |
171 | ||
410d3bba VL |
172 | abase = a->data; |
173 | bbase = b->data; | |
174 | dest = retarray->data; | |
175 | ||
176 | if (rxstride == 1 && axstride == 1 && bxstride == 1) | |
6de9cd9a | 177 | { |
85206901 JB |
178 | const GFC_INTEGER_8 * restrict bbase_y; |
179 | GFC_INTEGER_8 * restrict dest_y; | |
180 | const GFC_INTEGER_8 * restrict abase_n; | |
410d3bba VL |
181 | GFC_INTEGER_8 bbase_yn; |
182 | ||
ae740cce TK |
183 | if (rystride == ycount) |
184 | memset (dest, 0, (sizeof (GFC_INTEGER_8) * size0((array_t *) retarray))); | |
185 | else | |
186 | { | |
187 | for (y = 0; y < ycount; y++) | |
188 | for (x = 0; x < xcount; x++) | |
189 | dest[x + y*rystride] = (GFC_INTEGER_8)0; | |
190 | } | |
410d3bba VL |
191 | |
192 | for (y = 0; y < ycount; y++) | |
193 | { | |
194 | bbase_y = bbase + y*bystride; | |
195 | dest_y = dest + y*rystride; | |
196 | for (n = 0; n < count; n++) | |
197 | { | |
198 | abase_n = abase + n*aystride; | |
199 | bbase_yn = bbase_y[n]; | |
200 | for (x = 0; x < xcount; x++) | |
201 | { | |
202 | dest_y[x] += abase_n[x] * bbase_yn; | |
203 | } | |
204 | } | |
205 | } | |
206 | } | |
207 | else | |
208 | { | |
209 | for (y = 0; y < ycount; y++) | |
210 | for (x = 0; x < xcount; x++) | |
211 | dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0; | |
212 | ||
213 | for (y = 0; y < ycount; y++) | |
214 | for (n = 0; n < count; n++) | |
215 | for (x = 0; x < xcount; x++) | |
216 | /* dest[x,y] += a[x,n] * b[n,y] */ | |
217 | dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride]; | |
6de9cd9a DN |
218 | } |
219 | } | |
644cb69f FXC |
220 | |
221 | #endif |