]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgfortran/generated/matmul_r4.c
iresolve.c (gfc_resolve_all, [...]): Use PREFIX.
[thirdparty/gcc.git] / libgfortran / generated / matmul_r4.c
1 /* Implementation of the MATMUL intrinsic
2 Copyright 2002 Free Software Foundation, Inc.
3 Contributed by Paul Brook <paul@nowt.org>
4
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
6
7 Libgfortran is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with libgfor; see the file COPYING.LIB. If not,
19 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 #include "config.h"
23 #include <stdlib.h>
24 #include <string.h>
25 #include <assert.h>
26 #include "libgfortran.h"
27
28 /* This is a C version of the following fortran pseudo-code. The key
29 point is the loop order -- we access all arrays column-first, which
30 improves the performance enough to boost galgel spec score by 50%.
31
32 DIMENSION A(M,COUNT), B(COUNT,N), C(M,N)
33 C = 0
34 DO J=1,N
35 DO K=1,COUNT
36 DO I=1,M
37 C(I,J) = C(I,J)+A(I,K)*B(K,J)
38 */
39
40 extern void matmul_r4 (gfc_array_r4 * retarray, gfc_array_r4 * a, gfc_array_r4 * b);
41 export_proto(matmul_r4);
42
43 void
44 matmul_r4 (gfc_array_r4 * retarray, gfc_array_r4 * a, gfc_array_r4 * b)
45 {
46 GFC_REAL_4 *abase;
47 GFC_REAL_4 *bbase;
48 GFC_REAL_4 *dest;
49
50 index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
51 index_type x, y, n, count, xcount, ycount;
52
53 assert (GFC_DESCRIPTOR_RANK (a) == 2
54 || GFC_DESCRIPTOR_RANK (b) == 2);
55
56 /* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
57
58 Either A or B (but not both) can be rank 1:
59
60 o One-dimensional argument A is implicitly treated as a row matrix
61 dimensioned [1,count], so xcount=1.
62
63 o One-dimensional argument B is implicitly treated as a column matrix
64 dimensioned [count, 1], so ycount=1.
65 */
66
67 if (retarray->data == NULL)
68 {
69 if (GFC_DESCRIPTOR_RANK (a) == 1)
70 {
71 retarray->dim[0].lbound = 0;
72 retarray->dim[0].ubound = b->dim[1].ubound - b->dim[1].lbound;
73 retarray->dim[0].stride = 1;
74 }
75 else if (GFC_DESCRIPTOR_RANK (b) == 1)
76 {
77 retarray->dim[0].lbound = 0;
78 retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
79 retarray->dim[0].stride = 1;
80 }
81 else
82 {
83 retarray->dim[0].lbound = 0;
84 retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
85 retarray->dim[0].stride = 1;
86
87 retarray->dim[1].lbound = 0;
88 retarray->dim[1].ubound = b->dim[1].ubound - b->dim[1].lbound;
89 retarray->dim[1].stride = retarray->dim[0].ubound+1;
90 }
91
92 retarray->data
93 = internal_malloc_size (sizeof (GFC_REAL_4) * size0 (retarray));
94 retarray->base = 0;
95 }
96
97 abase = a->data;
98 bbase = b->data;
99 dest = retarray->data;
100
101 if (retarray->dim[0].stride == 0)
102 retarray->dim[0].stride = 1;
103 if (a->dim[0].stride == 0)
104 a->dim[0].stride = 1;
105 if (b->dim[0].stride == 0)
106 b->dim[0].stride = 1;
107
108
109 if (GFC_DESCRIPTOR_RANK (retarray) == 1)
110 {
111 /* One-dimensional result may be addressed in the code below
112 either as a row or a column matrix. We want both cases to
113 work. */
114 rxstride = rystride = retarray->dim[0].stride;
115 }
116 else
117 {
118 rxstride = retarray->dim[0].stride;
119 rystride = retarray->dim[1].stride;
120 }
121
122
123 if (GFC_DESCRIPTOR_RANK (a) == 1)
124 {
125 /* Treat it as a a row matrix A[1,count]. */
126 axstride = a->dim[0].stride;
127 aystride = 1;
128
129 xcount = 1;
130 count = a->dim[0].ubound + 1 - a->dim[0].lbound;
131 }
132 else
133 {
134 axstride = a->dim[0].stride;
135 aystride = a->dim[1].stride;
136
137 count = a->dim[1].ubound + 1 - a->dim[1].lbound;
138 xcount = a->dim[0].ubound + 1 - a->dim[0].lbound;
139 }
140
141 assert(count == b->dim[0].ubound + 1 - b->dim[0].lbound);
142
143 if (GFC_DESCRIPTOR_RANK (b) == 1)
144 {
145 /* Treat it as a column matrix B[count,1] */
146 bxstride = b->dim[0].stride;
147
148 /* bystride should never be used for 1-dimensional b.
149 in case it is we want it to cause a segfault, rather than
150 an incorrect result. */
151 bystride = 0xDEADBEEF;
152 ycount = 1;
153 }
154 else
155 {
156 bxstride = b->dim[0].stride;
157 bystride = b->dim[1].stride;
158 ycount = b->dim[1].ubound + 1 - b->dim[1].lbound;
159 }
160
161 assert (a->base == 0);
162 assert (b->base == 0);
163 assert (retarray->base == 0);
164
165 abase = a->data;
166 bbase = b->data;
167 dest = retarray->data;
168
169 if (rxstride == 1 && axstride == 1 && bxstride == 1)
170 {
171 GFC_REAL_4 *bbase_y;
172 GFC_REAL_4 *dest_y;
173 GFC_REAL_4 *abase_n;
174 GFC_REAL_4 bbase_yn;
175
176 memset (dest, 0, (sizeof (GFC_REAL_4) * size0(retarray)));
177
178 for (y = 0; y < ycount; y++)
179 {
180 bbase_y = bbase + y*bystride;
181 dest_y = dest + y*rystride;
182 for (n = 0; n < count; n++)
183 {
184 abase_n = abase + n*aystride;
185 bbase_yn = bbase_y[n];
186 for (x = 0; x < xcount; x++)
187 {
188 dest_y[x] += abase_n[x] * bbase_yn;
189 }
190 }
191 }
192 }
193 else
194 {
195 for (y = 0; y < ycount; y++)
196 for (x = 0; x < xcount; x++)
197 dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
198
199 for (y = 0; y < ycount; y++)
200 for (n = 0; n < count; n++)
201 for (x = 0; x < xcount; x++)
202 /* dest[x,y] += a[x,n] * b[n,y] */
203 dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
204 }
205 }