]>
git.ipfire.org Git - thirdparty/gcc.git/blob - libgfortran/generated/matmul_r4.c
1 /* Implementation of the MATMUL intrinsic
2 Copyright 2002 Free Software Foundation, Inc.
3 Contributed by Paul Brook <paul@nowt.org>
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
7 Libgfortran is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
12 Libgfortran is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with libgfor; see the file COPYING.LIB. If not,
19 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
26 #include "libgfortran.h"
28 /* This is a C version of the following fortran pseudo-code. The key
29 point is the loop order -- we access all arrays column-first, which
30 improves the performance enough to boost galgel spec score by 50%.
32 DIMENSION A(M,COUNT), B(COUNT,N), C(M,N)
37 C(I,J) = C(I,J)+A(I,K)*B(K,J)
40 extern void matmul_r4 (gfc_array_r4
* retarray
, gfc_array_r4
* a
, gfc_array_r4
* b
);
41 export_proto(matmul_r4
);
44 matmul_r4 (gfc_array_r4
* retarray
, gfc_array_r4
* a
, gfc_array_r4
* b
)
50 index_type rxstride
, rystride
, axstride
, aystride
, bxstride
, bystride
;
51 index_type x
, y
, n
, count
, xcount
, ycount
;
53 assert (GFC_DESCRIPTOR_RANK (a
) == 2
54 || GFC_DESCRIPTOR_RANK (b
) == 2);
56 /* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
58 Either A or B (but not both) can be rank 1:
60 o One-dimensional argument A is implicitly treated as a row matrix
61 dimensioned [1,count], so xcount=1.
63 o One-dimensional argument B is implicitly treated as a column matrix
64 dimensioned [count, 1], so ycount=1.
67 if (retarray
->data
== NULL
)
69 if (GFC_DESCRIPTOR_RANK (a
) == 1)
71 retarray
->dim
[0].lbound
= 0;
72 retarray
->dim
[0].ubound
= b
->dim
[1].ubound
- b
->dim
[1].lbound
;
73 retarray
->dim
[0].stride
= 1;
75 else if (GFC_DESCRIPTOR_RANK (b
) == 1)
77 retarray
->dim
[0].lbound
= 0;
78 retarray
->dim
[0].ubound
= a
->dim
[0].ubound
- a
->dim
[0].lbound
;
79 retarray
->dim
[0].stride
= 1;
83 retarray
->dim
[0].lbound
= 0;
84 retarray
->dim
[0].ubound
= a
->dim
[0].ubound
- a
->dim
[0].lbound
;
85 retarray
->dim
[0].stride
= 1;
87 retarray
->dim
[1].lbound
= 0;
88 retarray
->dim
[1].ubound
= b
->dim
[1].ubound
- b
->dim
[1].lbound
;
89 retarray
->dim
[1].stride
= retarray
->dim
[0].ubound
+1;
93 = internal_malloc_size (sizeof (GFC_REAL_4
) * size0 (retarray
));
99 dest
= retarray
->data
;
101 if (retarray
->dim
[0].stride
== 0)
102 retarray
->dim
[0].stride
= 1;
103 if (a
->dim
[0].stride
== 0)
104 a
->dim
[0].stride
= 1;
105 if (b
->dim
[0].stride
== 0)
106 b
->dim
[0].stride
= 1;
109 if (GFC_DESCRIPTOR_RANK (retarray
) == 1)
111 /* One-dimensional result may be addressed in the code below
112 either as a row or a column matrix. We want both cases to
114 rxstride
= rystride
= retarray
->dim
[0].stride
;
118 rxstride
= retarray
->dim
[0].stride
;
119 rystride
= retarray
->dim
[1].stride
;
123 if (GFC_DESCRIPTOR_RANK (a
) == 1)
125 /* Treat it as a a row matrix A[1,count]. */
126 axstride
= a
->dim
[0].stride
;
130 count
= a
->dim
[0].ubound
+ 1 - a
->dim
[0].lbound
;
134 axstride
= a
->dim
[0].stride
;
135 aystride
= a
->dim
[1].stride
;
137 count
= a
->dim
[1].ubound
+ 1 - a
->dim
[1].lbound
;
138 xcount
= a
->dim
[0].ubound
+ 1 - a
->dim
[0].lbound
;
141 assert(count
== b
->dim
[0].ubound
+ 1 - b
->dim
[0].lbound
);
143 if (GFC_DESCRIPTOR_RANK (b
) == 1)
145 /* Treat it as a column matrix B[count,1] */
146 bxstride
= b
->dim
[0].stride
;
148 /* bystride should never be used for 1-dimensional b.
149 in case it is we want it to cause a segfault, rather than
150 an incorrect result. */
151 bystride
= 0xDEADBEEF;
156 bxstride
= b
->dim
[0].stride
;
157 bystride
= b
->dim
[1].stride
;
158 ycount
= b
->dim
[1].ubound
+ 1 - b
->dim
[1].lbound
;
161 assert (a
->base
== 0);
162 assert (b
->base
== 0);
163 assert (retarray
->base
== 0);
167 dest
= retarray
->data
;
169 if (rxstride
== 1 && axstride
== 1 && bxstride
== 1)
176 memset (dest
, 0, (sizeof (GFC_REAL_4
) * size0(retarray
)));
178 for (y
= 0; y
< ycount
; y
++)
180 bbase_y
= bbase
+ y
*bystride
;
181 dest_y
= dest
+ y
*rystride
;
182 for (n
= 0; n
< count
; n
++)
184 abase_n
= abase
+ n
*aystride
;
185 bbase_yn
= bbase_y
[n
];
186 for (x
= 0; x
< xcount
; x
++)
188 dest_y
[x
] += abase_n
[x
] * bbase_yn
;
195 for (y
= 0; y
< ycount
; y
++)
196 for (x
= 0; x
< xcount
; x
++)
197 dest
[x
*rxstride
+ y
*rystride
] = (GFC_REAL_4
)0;
199 for (y
= 0; y
< ycount
; y
++)
200 for (n
= 0; n
< count
; n
++)
201 for (x
= 0; x
< xcount
; x
++)
202 /* dest[x,y] += a[x,n] * b[n,y] */
203 dest
[x
*rxstride
+ y
*rystride
] += abase
[x
*axstride
+ n
*aystride
] * bbase
[n
*bxstride
+ y
*bystride
];