]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgfortran/generated/matmul_i2.c
re PR fortran/58175 ([OOP] Incorrect warning message on scalar finalizer)
[thirdparty/gcc.git] / libgfortran / generated / matmul_i2.c
CommitLineData
567c915b 1/* Implementation of the MATMUL intrinsic
818ab71a 2 Copyright (C) 2002-2016 Free Software Foundation, Inc.
567c915b
TK
3 Contributed by Paul Brook <paul@nowt.org>
4
21d1335b 5This file is part of the GNU Fortran runtime library (libgfortran).
567c915b
TK
6
7Libgfortran is free software; you can redistribute it and/or
8modify it under the terms of the GNU General Public
9License as published by the Free Software Foundation; either
748086b7 10version 3 of the License, or (at your option) any later version.
567c915b
TK
11
12Libgfortran is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
748086b7
JJ
17Under Section 7 of GPL version 3, you are granted additional
18permissions described in the GCC Runtime Library Exception, version
193.1, as published by the Free Software Foundation.
20
21You should have received a copy of the GNU General Public License and
22a copy of the GCC Runtime Library Exception along with this program;
23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24<http://www.gnu.org/licenses/>. */
567c915b 25
36ae8a61 26#include "libgfortran.h"
567c915b
TK
27#include <stdlib.h>
28#include <string.h>
29#include <assert.h>
36ae8a61 30
567c915b
TK
31
32#if defined (HAVE_GFC_INTEGER_2)
33
34/* Prototype for the BLAS ?gemm subroutine, a pointer to which can be
5d70ab07 35 passed to us by the front-end, in which case we call it for large
567c915b
TK
36 matrices. */
37
38typedef void (*blas_call)(const char *, const char *, const int *, const int *,
39 const int *, const GFC_INTEGER_2 *, const GFC_INTEGER_2 *,
40 const int *, const GFC_INTEGER_2 *, const int *,
41 const GFC_INTEGER_2 *, GFC_INTEGER_2 *, const int *,
42 int, int);
43
44/* The order of loops is different in the case of plain matrix
45 multiplication C=MATMUL(A,B), and in the frequent special case where
46 the argument A is the temporary result of a TRANSPOSE intrinsic:
47 C=MATMUL(TRANSPOSE(A),B). Transposed temporaries are detected by
48 looking at their strides.
49
50 The equivalent Fortran pseudo-code is:
51
52 DIMENSION A(M,COUNT), B(COUNT,N), C(M,N)
53 IF (.NOT.IS_TRANSPOSED(A)) THEN
54 C = 0
55 DO J=1,N
56 DO K=1,COUNT
57 DO I=1,M
58 C(I,J) = C(I,J)+A(I,K)*B(K,J)
59 ELSE
60 DO J=1,N
61 DO I=1,M
62 S = 0
63 DO K=1,COUNT
64 S = S+A(I,K)*B(K,J)
65 C(I,J) = S
66 ENDIF
67*/
68
69/* If try_blas is set to a nonzero value, then the matmul function will
70 see if there is a way to perform the matrix multiplication by a call
71 to the BLAS gemm function. */
72
73extern void matmul_i2 (gfc_array_i2 * const restrict retarray,
74 gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
75 int blas_limit, blas_call gemm);
76export_proto(matmul_i2);
77
78void
79matmul_i2 (gfc_array_i2 * const restrict retarray,
80 gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
81 int blas_limit, blas_call gemm)
82{
83 const GFC_INTEGER_2 * restrict abase;
84 const GFC_INTEGER_2 * restrict bbase;
85 GFC_INTEGER_2 * restrict dest;
86
87 index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
88 index_type x, y, n, count, xcount, ycount;
89
90 assert (GFC_DESCRIPTOR_RANK (a) == 2
91 || GFC_DESCRIPTOR_RANK (b) == 2);
92
93/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
94
95 Either A or B (but not both) can be rank 1:
96
97 o One-dimensional argument A is implicitly treated as a row matrix
98 dimensioned [1,count], so xcount=1.
99
100 o One-dimensional argument B is implicitly treated as a column matrix
101 dimensioned [count, 1], so ycount=1.
5d70ab07 102*/
567c915b 103
21d1335b 104 if (retarray->base_addr == NULL)
567c915b
TK
105 {
106 if (GFC_DESCRIPTOR_RANK (a) == 1)
107 {
dfb55fdc
TK
108 GFC_DIMENSION_SET(retarray->dim[0], 0,
109 GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
567c915b
TK
110 }
111 else if (GFC_DESCRIPTOR_RANK (b) == 1)
112 {
dfb55fdc
TK
113 GFC_DIMENSION_SET(retarray->dim[0], 0,
114 GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
567c915b
TK
115 }
116 else
117 {
dfb55fdc
TK
118 GFC_DIMENSION_SET(retarray->dim[0], 0,
119 GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
567c915b 120
dfb55fdc
TK
121 GFC_DIMENSION_SET(retarray->dim[1], 0,
122 GFC_DESCRIPTOR_EXTENT(b,1) - 1,
123 GFC_DESCRIPTOR_EXTENT(retarray,0));
567c915b
TK
124 }
125
21d1335b 126 retarray->base_addr
92e6f3a4 127 = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2));
567c915b
TK
128 retarray->offset = 0;
129 }
5d70ab07
JD
130 else if (unlikely (compile_options.bounds_check))
131 {
132 index_type ret_extent, arg_extent;
133
134 if (GFC_DESCRIPTOR_RANK (a) == 1)
135 {
136 arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
137 ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
138 if (arg_extent != ret_extent)
139 runtime_error ("Incorrect extent in return array in"
140 " MATMUL intrinsic: is %ld, should be %ld",
141 (long int) ret_extent, (long int) arg_extent);
142 }
143 else if (GFC_DESCRIPTOR_RANK (b) == 1)
144 {
145 arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
146 ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
147 if (arg_extent != ret_extent)
148 runtime_error ("Incorrect extent in return array in"
149 " MATMUL intrinsic: is %ld, should be %ld",
150 (long int) ret_extent, (long int) arg_extent);
151 }
152 else
153 {
154 arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
155 ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
156 if (arg_extent != ret_extent)
157 runtime_error ("Incorrect extent in return array in"
158 " MATMUL intrinsic for dimension 1:"
159 " is %ld, should be %ld",
160 (long int) ret_extent, (long int) arg_extent);
161
162 arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
163 ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
164 if (arg_extent != ret_extent)
165 runtime_error ("Incorrect extent in return array in"
166 " MATMUL intrinsic for dimension 2:"
167 " is %ld, should be %ld",
168 (long int) ret_extent, (long int) arg_extent);
169 }
170 }
567c915b
TK
171
172
173 if (GFC_DESCRIPTOR_RANK (retarray) == 1)
174 {
175 /* One-dimensional result may be addressed in the code below
176 either as a row or a column matrix. We want both cases to
177 work. */
dfb55fdc 178 rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
567c915b
TK
179 }
180 else
181 {
dfb55fdc
TK
182 rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
183 rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
567c915b
TK
184 }
185
186
187 if (GFC_DESCRIPTOR_RANK (a) == 1)
188 {
189 /* Treat it as a a row matrix A[1,count]. */
dfb55fdc 190 axstride = GFC_DESCRIPTOR_STRIDE(a,0);
567c915b
TK
191 aystride = 1;
192
193 xcount = 1;
dfb55fdc 194 count = GFC_DESCRIPTOR_EXTENT(a,0);
567c915b
TK
195 }
196 else
197 {
dfb55fdc
TK
198 axstride = GFC_DESCRIPTOR_STRIDE(a,0);
199 aystride = GFC_DESCRIPTOR_STRIDE(a,1);
567c915b 200
dfb55fdc
TK
201 count = GFC_DESCRIPTOR_EXTENT(a,1);
202 xcount = GFC_DESCRIPTOR_EXTENT(a,0);
567c915b
TK
203 }
204
dfb55fdc 205 if (count != GFC_DESCRIPTOR_EXTENT(b,0))
7edc89d4 206 {
dfb55fdc 207 if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
7edc89d4
TK
208 runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
209 }
567c915b
TK
210
211 if (GFC_DESCRIPTOR_RANK (b) == 1)
212 {
213 /* Treat it as a column matrix B[count,1] */
dfb55fdc 214 bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
567c915b
TK
215
216 /* bystride should never be used for 1-dimensional b.
217 in case it is we want it to cause a segfault, rather than
218 an incorrect result. */
219 bystride = 0xDEADBEEF;
220 ycount = 1;
221 }
222 else
223 {
dfb55fdc
TK
224 bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
225 bystride = GFC_DESCRIPTOR_STRIDE(b,1);
226 ycount = GFC_DESCRIPTOR_EXTENT(b,1);
567c915b
TK
227 }
228
21d1335b
TB
229 abase = a->base_addr;
230 bbase = b->base_addr;
231 dest = retarray->base_addr;
567c915b 232
5d70ab07 233 /* Now that everything is set up, we perform the multiplication
567c915b
TK
234 itself. */
235
236#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
5d70ab07
JD
237#define min(a,b) ((a) <= (b) ? (a) : (b))
238#define max(a,b) ((a) >= (b) ? (a) : (b))
567c915b
TK
239
240 if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
241 && (bxstride == 1 || bystride == 1)
242 && (((float) xcount) * ((float) ycount) * ((float) count)
243 > POW3(blas_limit)))
567c915b 244 {
5d70ab07
JD
245 const int m = xcount, n = ycount, k = count, ldc = rystride;
246 const GFC_INTEGER_2 one = 1, zero = 0;
247 const int lda = (axstride == 1) ? aystride : axstride,
248 ldb = (bxstride == 1) ? bystride : bxstride;
567c915b 249
5d70ab07 250 if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
567c915b 251 {
5d70ab07
JD
252 assert (gemm != NULL);
253 gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
254 &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest,
255 &ldc, 1, 1);
256 return;
567c915b 257 }
5d70ab07 258 }
567c915b 259
5d70ab07
JD
260 if (rxstride == 1 && axstride == 1 && bxstride == 1)
261 {
262 /* This block of code implements a tuned matmul, derived from
263 Superscalar GEMM-based level 3 BLAS, Beta version 0.1
264
265 Bo Kagstrom and Per Ling
266 Department of Computing Science
267 Umea University
268 S-901 87 Umea, Sweden
269
270 from netlib.org, translated to C, and modified for matmul.m4. */
271
272 const GFC_INTEGER_2 *a, *b;
273 GFC_INTEGER_2 *c;
274 const index_type m = xcount, n = ycount, k = count;
275
276 /* System generated locals */
277 index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
278 i1, i2, i3, i4, i5, i6;
279
280 /* Local variables */
281 GFC_INTEGER_2 t1[65536], /* was [256][256] */
282 f11, f12, f21, f22, f31, f32, f41, f42,
283 f13, f14, f23, f24, f33, f34, f43, f44;
284 index_type i, j, l, ii, jj, ll;
285 index_type isec, jsec, lsec, uisec, ujsec, ulsec;
286
287 a = abase;
288 b = bbase;
289 c = retarray->base_addr;
290
291 /* Parameter adjustments */
292 c_dim1 = rystride;
293 c_offset = 1 + c_dim1;
294 c -= c_offset;
295 a_dim1 = aystride;
296 a_offset = 1 + a_dim1;
297 a -= a_offset;
298 b_dim1 = bystride;
299 b_offset = 1 + b_dim1;
300 b -= b_offset;
301
302 /* Early exit if possible */
303 if (m == 0 || n == 0 || k == 0)
304 return;
305
306 /* Empty c first. */
307 for (j=1; j<=n; j++)
308 for (i=1; i<=m; i++)
309 c[i + j * c_dim1] = (GFC_INTEGER_2)0;
310
311 /* Start turning the crank. */
312 i1 = n;
313 for (jj = 1; jj <= i1; jj += 512)
567c915b 314 {
5d70ab07
JD
315 /* Computing MIN */
316 i2 = 512;
317 i3 = n - jj + 1;
318 jsec = min(i2,i3);
319 ujsec = jsec - jsec % 4;
320 i2 = k;
321 for (ll = 1; ll <= i2; ll += 256)
567c915b 322 {
5d70ab07
JD
323 /* Computing MIN */
324 i3 = 256;
325 i4 = k - ll + 1;
326 lsec = min(i3,i4);
327 ulsec = lsec - lsec % 2;
328
329 i3 = m;
330 for (ii = 1; ii <= i3; ii += 256)
567c915b 331 {
5d70ab07
JD
332 /* Computing MIN */
333 i4 = 256;
334 i5 = m - ii + 1;
335 isec = min(i4,i5);
336 uisec = isec - isec % 2;
337 i4 = ll + ulsec - 1;
338 for (l = ll; l <= i4; l += 2)
339 {
340 i5 = ii + uisec - 1;
341 for (i = ii; i <= i5; i += 2)
342 {
343 t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
344 a[i + l * a_dim1];
345 t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
346 a[i + (l + 1) * a_dim1];
347 t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
348 a[i + 1 + l * a_dim1];
349 t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
350 a[i + 1 + (l + 1) * a_dim1];
351 }
352 if (uisec < isec)
353 {
354 t1[l - ll + 1 + (isec << 8) - 257] =
355 a[ii + isec - 1 + l * a_dim1];
356 t1[l - ll + 2 + (isec << 8) - 257] =
357 a[ii + isec - 1 + (l + 1) * a_dim1];
358 }
359 }
360 if (ulsec < lsec)
361 {
362 i4 = ii + isec - 1;
363 for (i = ii; i<= i4; ++i)
364 {
365 t1[lsec + ((i - ii + 1) << 8) - 257] =
366 a[i + (ll + lsec - 1) * a_dim1];
367 }
368 }
369
370 uisec = isec - isec % 4;
371 i4 = jj + ujsec - 1;
372 for (j = jj; j <= i4; j += 4)
373 {
374 i5 = ii + uisec - 1;
375 for (i = ii; i <= i5; i += 4)
376 {
377 f11 = c[i + j * c_dim1];
378 f21 = c[i + 1 + j * c_dim1];
379 f12 = c[i + (j + 1) * c_dim1];
380 f22 = c[i + 1 + (j + 1) * c_dim1];
381 f13 = c[i + (j + 2) * c_dim1];
382 f23 = c[i + 1 + (j + 2) * c_dim1];
383 f14 = c[i + (j + 3) * c_dim1];
384 f24 = c[i + 1 + (j + 3) * c_dim1];
385 f31 = c[i + 2 + j * c_dim1];
386 f41 = c[i + 3 + j * c_dim1];
387 f32 = c[i + 2 + (j + 1) * c_dim1];
388 f42 = c[i + 3 + (j + 1) * c_dim1];
389 f33 = c[i + 2 + (j + 2) * c_dim1];
390 f43 = c[i + 3 + (j + 2) * c_dim1];
391 f34 = c[i + 2 + (j + 3) * c_dim1];
392 f44 = c[i + 3 + (j + 3) * c_dim1];
393 i6 = ll + lsec - 1;
394 for (l = ll; l <= i6; ++l)
395 {
396 f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
397 * b[l + j * b_dim1];
398 f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
399 * b[l + j * b_dim1];
400 f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
401 * b[l + (j + 1) * b_dim1];
402 f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
403 * b[l + (j + 1) * b_dim1];
404 f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
405 * b[l + (j + 2) * b_dim1];
406 f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
407 * b[l + (j + 2) * b_dim1];
408 f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
409 * b[l + (j + 3) * b_dim1];
410 f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
411 * b[l + (j + 3) * b_dim1];
412 f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
413 * b[l + j * b_dim1];
414 f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
415 * b[l + j * b_dim1];
416 f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
417 * b[l + (j + 1) * b_dim1];
418 f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
419 * b[l + (j + 1) * b_dim1];
420 f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
421 * b[l + (j + 2) * b_dim1];
422 f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
423 * b[l + (j + 2) * b_dim1];
424 f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
425 * b[l + (j + 3) * b_dim1];
426 f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
427 * b[l + (j + 3) * b_dim1];
428 }
429 c[i + j * c_dim1] = f11;
430 c[i + 1 + j * c_dim1] = f21;
431 c[i + (j + 1) * c_dim1] = f12;
432 c[i + 1 + (j + 1) * c_dim1] = f22;
433 c[i + (j + 2) * c_dim1] = f13;
434 c[i + 1 + (j + 2) * c_dim1] = f23;
435 c[i + (j + 3) * c_dim1] = f14;
436 c[i + 1 + (j + 3) * c_dim1] = f24;
437 c[i + 2 + j * c_dim1] = f31;
438 c[i + 3 + j * c_dim1] = f41;
439 c[i + 2 + (j + 1) * c_dim1] = f32;
440 c[i + 3 + (j + 1) * c_dim1] = f42;
441 c[i + 2 + (j + 2) * c_dim1] = f33;
442 c[i + 3 + (j + 2) * c_dim1] = f43;
443 c[i + 2 + (j + 3) * c_dim1] = f34;
444 c[i + 3 + (j + 3) * c_dim1] = f44;
445 }
446 if (uisec < isec)
447 {
448 i5 = ii + isec - 1;
449 for (i = ii + uisec; i <= i5; ++i)
450 {
451 f11 = c[i + j * c_dim1];
452 f12 = c[i + (j + 1) * c_dim1];
453 f13 = c[i + (j + 2) * c_dim1];
454 f14 = c[i + (j + 3) * c_dim1];
455 i6 = ll + lsec - 1;
456 for (l = ll; l <= i6; ++l)
457 {
458 f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
459 257] * b[l + j * b_dim1];
460 f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
461 257] * b[l + (j + 1) * b_dim1];
462 f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
463 257] * b[l + (j + 2) * b_dim1];
464 f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
465 257] * b[l + (j + 3) * b_dim1];
466 }
467 c[i + j * c_dim1] = f11;
468 c[i + (j + 1) * c_dim1] = f12;
469 c[i + (j + 2) * c_dim1] = f13;
470 c[i + (j + 3) * c_dim1] = f14;
471 }
472 }
473 }
474 if (ujsec < jsec)
475 {
476 i4 = jj + jsec - 1;
477 for (j = jj + ujsec; j <= i4; ++j)
478 {
479 i5 = ii + uisec - 1;
480 for (i = ii; i <= i5; i += 4)
481 {
482 f11 = c[i + j * c_dim1];
483 f21 = c[i + 1 + j * c_dim1];
484 f31 = c[i + 2 + j * c_dim1];
485 f41 = c[i + 3 + j * c_dim1];
486 i6 = ll + lsec - 1;
487 for (l = ll; l <= i6; ++l)
488 {
489 f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
490 257] * b[l + j * b_dim1];
491 f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
492 257] * b[l + j * b_dim1];
493 f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
494 257] * b[l + j * b_dim1];
495 f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
496 257] * b[l + j * b_dim1];
497 }
498 c[i + j * c_dim1] = f11;
499 c[i + 1 + j * c_dim1] = f21;
500 c[i + 2 + j * c_dim1] = f31;
501 c[i + 3 + j * c_dim1] = f41;
502 }
503 i5 = ii + isec - 1;
504 for (i = ii + uisec; i <= i5; ++i)
505 {
506 f11 = c[i + j * c_dim1];
507 i6 = ll + lsec - 1;
508 for (l = ll; l <= i6; ++l)
509 {
510 f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
511 257] * b[l + j * b_dim1];
512 }
513 c[i + j * c_dim1] = f11;
514 }
515 }
516 }
567c915b
TK
517 }
518 }
519 }
5d70ab07 520 return;
567c915b
TK
521 }
522 else if (rxstride == 1 && aystride == 1 && bxstride == 1)
523 {
524 if (GFC_DESCRIPTOR_RANK (a) != 1)
525 {
526 const GFC_INTEGER_2 *restrict abase_x;
527 const GFC_INTEGER_2 *restrict bbase_y;
528 GFC_INTEGER_2 *restrict dest_y;
529 GFC_INTEGER_2 s;
530
531 for (y = 0; y < ycount; y++)
532 {
533 bbase_y = &bbase[y*bystride];
534 dest_y = &dest[y*rystride];
535 for (x = 0; x < xcount; x++)
536 {
537 abase_x = &abase[x*axstride];
538 s = (GFC_INTEGER_2) 0;
539 for (n = 0; n < count; n++)
540 s += abase_x[n] * bbase_y[n];
541 dest_y[x] = s;
542 }
543 }
544 }
545 else
546 {
547 const GFC_INTEGER_2 *restrict bbase_y;
548 GFC_INTEGER_2 s;
549
550 for (y = 0; y < ycount; y++)
551 {
552 bbase_y = &bbase[y*bystride];
553 s = (GFC_INTEGER_2) 0;
554 for (n = 0; n < count; n++)
555 s += abase[n*axstride] * bbase_y[n];
556 dest[y*rystride] = s;
557 }
558 }
559 }
560 else if (axstride < aystride)
561 {
562 for (y = 0; y < ycount; y++)
563 for (x = 0; x < xcount; x++)
564 dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
565
566 for (y = 0; y < ycount; y++)
567 for (n = 0; n < count; n++)
568 for (x = 0; x < xcount; x++)
569 /* dest[x,y] += a[x,n] * b[n,y] */
5d70ab07
JD
570 dest[x*rxstride + y*rystride] +=
571 abase[x*axstride + n*aystride] *
572 bbase[n*bxstride + y*bystride];
567c915b
TK
573 }
574 else if (GFC_DESCRIPTOR_RANK (a) == 1)
575 {
576 const GFC_INTEGER_2 *restrict bbase_y;
577 GFC_INTEGER_2 s;
578
579 for (y = 0; y < ycount; y++)
580 {
581 bbase_y = &bbase[y*bystride];
582 s = (GFC_INTEGER_2) 0;
583 for (n = 0; n < count; n++)
584 s += abase[n*axstride] * bbase_y[n*bxstride];
585 dest[y*rxstride] = s;
586 }
587 }
588 else
589 {
590 const GFC_INTEGER_2 *restrict abase_x;
591 const GFC_INTEGER_2 *restrict bbase_y;
592 GFC_INTEGER_2 *restrict dest_y;
593 GFC_INTEGER_2 s;
594
595 for (y = 0; y < ycount; y++)
596 {
597 bbase_y = &bbase[y*bystride];
598 dest_y = &dest[y*rystride];
599 for (x = 0; x < xcount; x++)
600 {
601 abase_x = &abase[x*axstride];
602 s = (GFC_INTEGER_2) 0;
603 for (n = 0; n < count; n++)
604 s += abase_x[n*aystride] * bbase_y[n*bxstride];
605 dest_y[x*rxstride] = s;
606 }
607 }
608 }
609}
567c915b 610#endif