/* Implementation of the MATMUL intrinsic
- Copyright (C) 2002-2020 Free Software Foundation, Inc.
+ Copyright (C) 2002-2023 Free Software Foundation, Inc.
Contributed by Paul Brook <paul@nowt.org>
This file is part of the GNU Fortran runtime library (libgfortran).
}
}
- if (rxstride == 1 && axstride == 1 && bxstride == 1)
+ if (rxstride == 1 && axstride == 1 && bxstride == 1
+ && GFC_DESCRIPTOR_RANK (b) != 1)
{
/* This block of code implements a tuned matmul, derived from
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_4 *restrict bbase_y;
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_4 *restrict abase_x;
}
}
- if (rxstride == 1 && axstride == 1 && bxstride == 1)
+ if (rxstride == 1 && axstride == 1 && bxstride == 1
+ && GFC_DESCRIPTOR_RANK (b) != 1)
{
/* This block of code implements a tuned matmul, derived from
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_4 *restrict bbase_y;
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_4 *restrict abase_x;
}
}
- if (rxstride == 1 && axstride == 1 && bxstride == 1)
+ if (rxstride == 1 && axstride == 1 && bxstride == 1
+ && GFC_DESCRIPTOR_RANK (b) != 1)
{
/* This block of code implements a tuned matmul, derived from
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_4 *restrict bbase_y;
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_4 *restrict abase_x;
}
}
- if (rxstride == 1 && axstride == 1 && bxstride == 1)
+ if (rxstride == 1 && axstride == 1 && bxstride == 1
+ && GFC_DESCRIPTOR_RANK (b) != 1)
{
/* This block of code implements a tuned matmul, derived from
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_4 *restrict bbase_y;
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_4 *restrict abase_x;
}
}
- if (rxstride == 1 && axstride == 1 && bxstride == 1)
+ if (rxstride == 1 && axstride == 1 && bxstride == 1
+ && GFC_DESCRIPTOR_RANK (b) != 1)
{
/* This block of code implements a tuned matmul, derived from
Superscalar GEMM-based level 3 BLAS, Beta version 0.1
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_4 *restrict bbase_y;
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_4 *restrict abase_x;