]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgfortran/generated/matmul_r4.c
re PR libfortran/19308 (I/O library should support more real and integer kinds)
[thirdparty/gcc.git] / libgfortran / generated / matmul_r4.c
CommitLineData
6de9cd9a 1/* Implementation of the MATMUL intrinsic
4b6903ec 2 Copyright 2002, 2005 Free Software Foundation, Inc.
6de9cd9a
DN
3 Contributed by Paul Brook <paul@nowt.org>
4
883c9d4d 5This file is part of the GNU Fortran 95 runtime library (libgfortran).
6de9cd9a
DN
6
7Libgfortran is free software; you can redistribute it and/or
57dea9f6 8modify it under the terms of the GNU General Public
6de9cd9a 9License as published by the Free Software Foundation; either
57dea9f6
TM
10version 2 of the License, or (at your option) any later version.
11
12In addition to the permissions in the GNU General Public License, the
13Free Software Foundation gives you unlimited permission to link the
14compiled version of this file into combinations with other programs,
15and to distribute those combinations without any restriction coming
16from the use of this file. (The General Public License restrictions
17do apply in other respects; for example, they cover modification of
18the file, and distribution when not linked into a combine
19executable.)
6de9cd9a
DN
20
21Libgfortran is distributed in the hope that it will be useful,
22but WITHOUT ANY WARRANTY; without even the implied warranty of
23MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
57dea9f6 24GNU General Public License for more details.
6de9cd9a 25
57dea9f6
TM
26You should have received a copy of the GNU General Public
27License along with libgfortran; see the file COPYING. If not,
fe2ae685
KC
28write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
29Boston, MA 02110-1301, USA. */
6de9cd9a
DN
30
31#include "config.h"
32#include <stdlib.h>
410d3bba 33#include <string.h>
6de9cd9a
DN
34#include <assert.h>
35#include "libgfortran.h"
36
644cb69f
FXC
37#if defined (HAVE_GFC_REAL_4)
38
410d3bba
VL
39/* This is a C version of the following fortran pseudo-code. The key
40 point is the loop order -- we access all arrays column-first, which
41 improves the performance enough to boost galgel spec score by 50%.
42
43 DIMENSION A(M,COUNT), B(COUNT,N), C(M,N)
44 C = 0
45 DO J=1,N
46 DO K=1,COUNT
47 DO I=1,M
48 C(I,J) = C(I,J)+A(I,K)*B(K,J)
49*/
50
7f68c75f
RH
51extern void matmul_r4 (gfc_array_r4 * retarray, gfc_array_r4 * a, gfc_array_r4 * b);
52export_proto(matmul_r4);
7d7b8bfe 53
6de9cd9a 54void
7f68c75f 55matmul_r4 (gfc_array_r4 * retarray, gfc_array_r4 * a, gfc_array_r4 * b)
6de9cd9a
DN
56{
57 GFC_REAL_4 *abase;
58 GFC_REAL_4 *bbase;
59 GFC_REAL_4 *dest;
410d3bba
VL
60
61 index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
62 index_type x, y, n, count, xcount, ycount;
6de9cd9a
DN
63
64 assert (GFC_DESCRIPTOR_RANK (a) == 2
65 || GFC_DESCRIPTOR_RANK (b) == 2);
883c9d4d 66
410d3bba
VL
67/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
68
69 Either A or B (but not both) can be rank 1:
70
71 o One-dimensional argument A is implicitly treated as a row matrix
72 dimensioned [1,count], so xcount=1.
73
74 o One-dimensional argument B is implicitly treated as a column matrix
75 dimensioned [count, 1], so ycount=1.
76 */
77
883c9d4d
VL
78 if (retarray->data == NULL)
79 {
80 if (GFC_DESCRIPTOR_RANK (a) == 1)
81 {
82 retarray->dim[0].lbound = 0;
83 retarray->dim[0].ubound = b->dim[1].ubound - b->dim[1].lbound;
84 retarray->dim[0].stride = 1;
85 }
86 else if (GFC_DESCRIPTOR_RANK (b) == 1)
87 {
88 retarray->dim[0].lbound = 0;
89 retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
90 retarray->dim[0].stride = 1;
91 }
92 else
93 {
94 retarray->dim[0].lbound = 0;
95 retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
96 retarray->dim[0].stride = 1;
420aa7b8 97
883c9d4d
VL
98 retarray->dim[1].lbound = 0;
99 retarray->dim[1].ubound = b->dim[1].ubound - b->dim[1].lbound;
100 retarray->dim[1].stride = retarray->dim[0].ubound+1;
101 }
420aa7b8 102
07d3cebe 103 retarray->data
4b6903ec 104 = internal_malloc_size (sizeof (GFC_REAL_4) * size0 ((array_t *) retarray));
efd4dc1a 105 retarray->offset = 0;
883c9d4d
VL
106 }
107
6de9cd9a
DN
108 abase = a->data;
109 bbase = b->data;
110 dest = retarray->data;
111
112 if (retarray->dim[0].stride == 0)
113 retarray->dim[0].stride = 1;
114 if (a->dim[0].stride == 0)
115 a->dim[0].stride = 1;
116 if (b->dim[0].stride == 0)
117 b->dim[0].stride = 1;
118
119
120 if (GFC_DESCRIPTOR_RANK (retarray) == 1)
121 {
410d3bba
VL
122 /* One-dimensional result may be addressed in the code below
123 either as a row or a column matrix. We want both cases to
124 work. */
125 rxstride = rystride = retarray->dim[0].stride;
6de9cd9a
DN
126 }
127 else
128 {
129 rxstride = retarray->dim[0].stride;
130 rystride = retarray->dim[1].stride;
131 }
132
410d3bba 133
6de9cd9a
DN
134 if (GFC_DESCRIPTOR_RANK (a) == 1)
135 {
410d3bba
VL
136 /* Treat it as a a row matrix A[1,count]. */
137 axstride = a->dim[0].stride;
138 aystride = 1;
139
6de9cd9a 140 xcount = 1;
410d3bba 141 count = a->dim[0].ubound + 1 - a->dim[0].lbound;
6de9cd9a
DN
142 }
143 else
144 {
410d3bba
VL
145 axstride = a->dim[0].stride;
146 aystride = a->dim[1].stride;
147
6de9cd9a 148 count = a->dim[1].ubound + 1 - a->dim[1].lbound;
6de9cd9a
DN
149 xcount = a->dim[0].ubound + 1 - a->dim[0].lbound;
150 }
410d3bba
VL
151
152 assert(count == b->dim[0].ubound + 1 - b->dim[0].lbound);
153
6de9cd9a
DN
154 if (GFC_DESCRIPTOR_RANK (b) == 1)
155 {
410d3bba
VL
156 /* Treat it as a column matrix B[count,1] */
157 bxstride = b->dim[0].stride;
158
159 /* bystride should never be used for 1-dimensional b.
160 in case it is we want it to cause a segfault, rather than
161 an incorrect result. */
420aa7b8 162 bystride = 0xDEADBEEF;
6de9cd9a
DN
163 ycount = 1;
164 }
165 else
166 {
410d3bba
VL
167 bxstride = b->dim[0].stride;
168 bystride = b->dim[1].stride;
6de9cd9a
DN
169 ycount = b->dim[1].ubound + 1 - b->dim[1].lbound;
170 }
171
410d3bba
VL
172 abase = a->data;
173 bbase = b->data;
174 dest = retarray->data;
175
176 if (rxstride == 1 && axstride == 1 && bxstride == 1)
6de9cd9a 177 {
410d3bba
VL
178 GFC_REAL_4 *bbase_y;
179 GFC_REAL_4 *dest_y;
180 GFC_REAL_4 *abase_n;
181 GFC_REAL_4 bbase_yn;
182
ae740cce
TK
183 if (rystride == ycount)
184 memset (dest, 0, (sizeof (GFC_REAL_4) * size0((array_t *) retarray)));
185 else
186 {
187 for (y = 0; y < ycount; y++)
188 for (x = 0; x < xcount; x++)
189 dest[x + y*rystride] = (GFC_REAL_4)0;
190 }
410d3bba
VL
191
192 for (y = 0; y < ycount; y++)
193 {
194 bbase_y = bbase + y*bystride;
195 dest_y = dest + y*rystride;
196 for (n = 0; n < count; n++)
197 {
198 abase_n = abase + n*aystride;
199 bbase_yn = bbase_y[n];
200 for (x = 0; x < xcount; x++)
201 {
202 dest_y[x] += abase_n[x] * bbase_yn;
203 }
204 }
205 }
206 }
207 else
208 {
209 for (y = 0; y < ycount; y++)
210 for (x = 0; x < xcount; x++)
211 dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
212
213 for (y = 0; y < ycount; y++)
214 for (n = 0; n < count; n++)
215 for (x = 0; x < xcount; x++)
216 /* dest[x,y] += a[x,n] * b[n,y] */
217 dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
6de9cd9a
DN
218 }
219}
644cb69f
FXC
220
221#endif