/* Implementation of the MATMUL intrinsic
- Copyright (C) 2002-2017 Free Software Foundation, Inc.
+ Copyright (C) 2002-2020 Free Software Foundation, Inc.
Contributed by Paul Brook <paul@nowt.org>
This file is part of the GNU Fortran runtime library (libgfortran).
arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
if (arg_extent != ret_extent)
- runtime_error ("Incorrect extent in return array in"
- " MATMUL intrinsic: is %ld, should be %ld",
+ runtime_error ("Array bound mismatch for dimension 1 of "
+ "array (%ld/%ld) ",
(long int) ret_extent, (long int) arg_extent);
}
else if (GFC_DESCRIPTOR_RANK (b) == 1)
arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
if (arg_extent != ret_extent)
- runtime_error ("Incorrect extent in return array in"
- " MATMUL intrinsic: is %ld, should be %ld",
+ runtime_error ("Array bound mismatch for dimension 1 of "
+ "array (%ld/%ld) ",
(long int) ret_extent, (long int) arg_extent);
}
else
arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
if (arg_extent != ret_extent)
- runtime_error ("Incorrect extent in return array in"
- " MATMUL intrinsic for dimension 1:"
- " is %ld, should be %ld",
+ runtime_error ("Array bound mismatch for dimension 1 of "
+ "array (%ld/%ld) ",
(long int) ret_extent, (long int) arg_extent);
arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
if (arg_extent != ret_extent)
- runtime_error ("Incorrect extent in return array in"
- " MATMUL intrinsic for dimension 2:"
- " is %ld, should be %ld",
+ runtime_error ("Array bound mismatch for dimension 2 of "
+ "array (%ld/%ld) ",
(long int) ret_extent, (long int) arg_extent);
}
}
if (count != GFC_DESCRIPTOR_EXTENT(b,0))
{
if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
- runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+ runtime_error ("Incorrect extent in argument B in MATMUL intrinsic "
+ "in dimension 1: is %ld, should be %ld",
+ (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count);
}
if (GFC_DESCRIPTOR_RANK (b) == 1)
bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
/* bystride should never be used for 1-dimensional b.
- in case it is we want it to cause a segfault, rather than
- an incorrect result. */
- bystride = 0xDEADBEEF;
+ The value is only used for calculation of the
+ memory by the buffer. */
+ bystride = 256;
ycount = 1;
}
else
if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
{
assert (gemm != NULL);
- gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+ const char *transa, *transb;
+ if (try_blas & 2)
+ transa = "C";
+ else
+ transa = axstride == 1 ? "N" : "T";
+
+ if (try_blas & 4)
+ transb = "C";
+ else
+ transb = bxstride == 1 ? "N" : "T";
+
+ gemm (transa, transb , &m,
&n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest,
&ldc, 1, 1);
return;
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+ GFC_COMPLEX_8 *t1;
a = abase;
b = bbase;
b_offset = 1 + b_dim1;
b -= b_offset;
+ /* Empty c first. */
+ for (j=1; j<=n; j++)
+ for (i=1; i<=m; i++)
+ c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1-1) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wvla"
- GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */
-#pragma GCC diagnostic pop
-
- /* Empty c first. */
- for (j=1; j<=n; j++)
- for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
/* Start turning the crank. */
i1 = n;
}
}
}
+ free(t1);
return;
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
if (arg_extent != ret_extent)
- runtime_error ("Incorrect extent in return array in"
- " MATMUL intrinsic: is %ld, should be %ld",
+ runtime_error ("Array bound mismatch for dimension 1 of "
+ "array (%ld/%ld) ",
(long int) ret_extent, (long int) arg_extent);
}
else if (GFC_DESCRIPTOR_RANK (b) == 1)
arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
if (arg_extent != ret_extent)
- runtime_error ("Incorrect extent in return array in"
- " MATMUL intrinsic: is %ld, should be %ld",
+ runtime_error ("Array bound mismatch for dimension 1 of "
+ "array (%ld/%ld) ",
(long int) ret_extent, (long int) arg_extent);
}
else
arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
if (arg_extent != ret_extent)
- runtime_error ("Incorrect extent in return array in"
- " MATMUL intrinsic for dimension 1:"
- " is %ld, should be %ld",
+ runtime_error ("Array bound mismatch for dimension 1 of "
+ "array (%ld/%ld) ",
(long int) ret_extent, (long int) arg_extent);
arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
if (arg_extent != ret_extent)
- runtime_error ("Incorrect extent in return array in"
- " MATMUL intrinsic for dimension 2:"
- " is %ld, should be %ld",
+ runtime_error ("Array bound mismatch for dimension 2 of "
+ "array (%ld/%ld) ",
(long int) ret_extent, (long int) arg_extent);
}
}
if (count != GFC_DESCRIPTOR_EXTENT(b,0))
{
if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
- runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+ runtime_error ("Incorrect extent in argument B in MATMUL intrinsic "
+ "in dimension 1: is %ld, should be %ld",
+ (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count);
}
if (GFC_DESCRIPTOR_RANK (b) == 1)
bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
/* bystride should never be used for 1-dimensional b.
- in case it is we want it to cause a segfault, rather than
- an incorrect result. */
- bystride = 0xDEADBEEF;
+ The value is only used for calculation of the
+ memory by the buffer. */
+ bystride = 256;
ycount = 1;
}
else
if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
{
assert (gemm != NULL);
- gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+ const char *transa, *transb;
+ if (try_blas & 2)
+ transa = "C";
+ else
+ transa = axstride == 1 ? "N" : "T";
+
+ if (try_blas & 4)
+ transb = "C";
+ else
+ transb = bxstride == 1 ? "N" : "T";
+
+ gemm (transa, transb , &m,
&n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest,
&ldc, 1, 1);
return;
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+ GFC_COMPLEX_8 *t1;
a = abase;
b = bbase;
b_offset = 1 + b_dim1;
b -= b_offset;
+ /* Empty c first. */
+ for (j=1; j<=n; j++)
+ for (i=1; i<=m; i++)
+ c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1-1) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wvla"
- GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */
-#pragma GCC diagnostic pop
-
- /* Empty c first. */
- for (j=1; j<=n; j++)
- for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
/* Start turning the crank. */
i1 = n;
}
}
}
+ free(t1);
return;
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
if (arg_extent != ret_extent)
- runtime_error ("Incorrect extent in return array in"
- " MATMUL intrinsic: is %ld, should be %ld",
+ runtime_error ("Array bound mismatch for dimension 1 of "
+ "array (%ld/%ld) ",
(long int) ret_extent, (long int) arg_extent);
}
else if (GFC_DESCRIPTOR_RANK (b) == 1)
arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
if (arg_extent != ret_extent)
- runtime_error ("Incorrect extent in return array in"
- " MATMUL intrinsic: is %ld, should be %ld",
+ runtime_error ("Array bound mismatch for dimension 1 of "
+ "array (%ld/%ld) ",
(long int) ret_extent, (long int) arg_extent);
}
else
arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
if (arg_extent != ret_extent)
- runtime_error ("Incorrect extent in return array in"
- " MATMUL intrinsic for dimension 1:"
- " is %ld, should be %ld",
+ runtime_error ("Array bound mismatch for dimension 1 of "
+ "array (%ld/%ld) ",
(long int) ret_extent, (long int) arg_extent);
arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
if (arg_extent != ret_extent)
- runtime_error ("Incorrect extent in return array in"
- " MATMUL intrinsic for dimension 2:"
- " is %ld, should be %ld",
+ runtime_error ("Array bound mismatch for dimension 2 of "
+ "array (%ld/%ld) ",
(long int) ret_extent, (long int) arg_extent);
}
}
if (count != GFC_DESCRIPTOR_EXTENT(b,0))
{
if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
- runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+ runtime_error ("Incorrect extent in argument B in MATMUL intrinsic "
+ "in dimension 1: is %ld, should be %ld",
+ (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count);
}
if (GFC_DESCRIPTOR_RANK (b) == 1)
bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
/* bystride should never be used for 1-dimensional b.
- in case it is we want it to cause a segfault, rather than
- an incorrect result. */
- bystride = 0xDEADBEEF;
+ The value is only used for calculation of the
+ memory by the buffer. */
+ bystride = 256;
ycount = 1;
}
else
if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
{
assert (gemm != NULL);
- gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+ const char *transa, *transb;
+ if (try_blas & 2)
+ transa = "C";
+ else
+ transa = axstride == 1 ? "N" : "T";
+
+ if (try_blas & 4)
+ transb = "C";
+ else
+ transb = bxstride == 1 ? "N" : "T";
+
+ gemm (transa, transb , &m,
&n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest,
&ldc, 1, 1);
return;
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+ GFC_COMPLEX_8 *t1;
a = abase;
b = bbase;
b_offset = 1 + b_dim1;
b -= b_offset;
+ /* Empty c first. */
+ for (j=1; j<=n; j++)
+ for (i=1; i<=m; i++)
+ c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1-1) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wvla"
- GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */
-#pragma GCC diagnostic pop
-
- /* Empty c first. */
- for (j=1; j<=n; j++)
- for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
/* Start turning the crank. */
i1 = n;
}
}
}
+ free(t1);
return;
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
#endif /* HAVE_AVX512F */
+/* AMD-specifix funtions with AVX128 and FMA3/FMA4. */
+
+#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
+void
+matmul_c8_avx128_fma3 (gfc_array_c8 * const restrict retarray,
+ gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
+ int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
+internal_proto(matmul_c8_avx128_fma3);
+#endif
+
+#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
+void
+matmul_c8_avx128_fma4 (gfc_array_c8 * const restrict retarray,
+ gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
+ int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
+internal_proto(matmul_c8_avx128_fma4);
+#endif
+
/* Function to fall back to if there is no special processor-specific version. */
static void
matmul_c8_vanilla (gfc_array_c8 * const restrict retarray,
arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
if (arg_extent != ret_extent)
- runtime_error ("Incorrect extent in return array in"
- " MATMUL intrinsic: is %ld, should be %ld",
+ runtime_error ("Array bound mismatch for dimension 1 of "
+ "array (%ld/%ld) ",
(long int) ret_extent, (long int) arg_extent);
}
else if (GFC_DESCRIPTOR_RANK (b) == 1)
arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
if (arg_extent != ret_extent)
- runtime_error ("Incorrect extent in return array in"
- " MATMUL intrinsic: is %ld, should be %ld",
+ runtime_error ("Array bound mismatch for dimension 1 of "
+ "array (%ld/%ld) ",
(long int) ret_extent, (long int) arg_extent);
}
else
arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
if (arg_extent != ret_extent)
- runtime_error ("Incorrect extent in return array in"
- " MATMUL intrinsic for dimension 1:"
- " is %ld, should be %ld",
+ runtime_error ("Array bound mismatch for dimension 1 of "
+ "array (%ld/%ld) ",
(long int) ret_extent, (long int) arg_extent);
arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
if (arg_extent != ret_extent)
- runtime_error ("Incorrect extent in return array in"
- " MATMUL intrinsic for dimension 2:"
- " is %ld, should be %ld",
+ runtime_error ("Array bound mismatch for dimension 2 of "
+ "array (%ld/%ld) ",
(long int) ret_extent, (long int) arg_extent);
}
}
if (count != GFC_DESCRIPTOR_EXTENT(b,0))
{
if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
- runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+ runtime_error ("Incorrect extent in argument B in MATMUL intrinsic "
+ "in dimension 1: is %ld, should be %ld",
+ (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count);
}
if (GFC_DESCRIPTOR_RANK (b) == 1)
bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
/* bystride should never be used for 1-dimensional b.
- in case it is we want it to cause a segfault, rather than
- an incorrect result. */
- bystride = 0xDEADBEEF;
+ The value is only used for calculation of the
+ memory by the buffer. */
+ bystride = 256;
ycount = 1;
}
else
if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
{
assert (gemm != NULL);
- gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+ const char *transa, *transb;
+ if (try_blas & 2)
+ transa = "C";
+ else
+ transa = axstride == 1 ? "N" : "T";
+
+ if (try_blas & 4)
+ transb = "C";
+ else
+ transb = bxstride == 1 ? "N" : "T";
+
+ gemm (transa, transb , &m,
&n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest,
&ldc, 1, 1);
return;
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+ GFC_COMPLEX_8 *t1;
a = abase;
b = bbase;
b_offset = 1 + b_dim1;
b -= b_offset;
+ /* Empty c first. */
+ for (j=1; j<=n; j++)
+ for (i=1; i<=m; i++)
+ c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1-1) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wvla"
- GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */
-#pragma GCC diagnostic pop
-
- /* Empty c first. */
- for (j=1; j<=n; j++)
- for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
/* Start turning the crank. */
i1 = n;
}
}
}
+ free(t1);
return;
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
}
#endif /* HAVE_AVX */
}
+ else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
+ {
+#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
+ if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
+ {
+ matmul_fn = matmul_c8_avx128_fma3;
+ goto store;
+ }
+#endif
+#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
+ if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
+ {
+ matmul_fn = matmul_c8_avx128_fma4;
+ goto store;
+ }
+#endif
+
+ }
store:
__atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED);
}
arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
if (arg_extent != ret_extent)
- runtime_error ("Incorrect extent in return array in"
- " MATMUL intrinsic: is %ld, should be %ld",
+ runtime_error ("Array bound mismatch for dimension 1 of "
+ "array (%ld/%ld) ",
(long int) ret_extent, (long int) arg_extent);
}
else if (GFC_DESCRIPTOR_RANK (b) == 1)
arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
if (arg_extent != ret_extent)
- runtime_error ("Incorrect extent in return array in"
- " MATMUL intrinsic: is %ld, should be %ld",
+ runtime_error ("Array bound mismatch for dimension 1 of "
+ "array (%ld/%ld) ",
(long int) ret_extent, (long int) arg_extent);
}
else
arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
if (arg_extent != ret_extent)
- runtime_error ("Incorrect extent in return array in"
- " MATMUL intrinsic for dimension 1:"
- " is %ld, should be %ld",
+ runtime_error ("Array bound mismatch for dimension 1 of "
+ "array (%ld/%ld) ",
(long int) ret_extent, (long int) arg_extent);
arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
if (arg_extent != ret_extent)
- runtime_error ("Incorrect extent in return array in"
- " MATMUL intrinsic for dimension 2:"
- " is %ld, should be %ld",
+ runtime_error ("Array bound mismatch for dimension 2 of "
+ "array (%ld/%ld) ",
(long int) ret_extent, (long int) arg_extent);
}
}
if (count != GFC_DESCRIPTOR_EXTENT(b,0))
{
if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
- runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+ runtime_error ("Incorrect extent in argument B in MATMUL intrinsic "
+ "in dimension 1: is %ld, should be %ld",
+ (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count);
}
if (GFC_DESCRIPTOR_RANK (b) == 1)
bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
/* bystride should never be used for 1-dimensional b.
- in case it is we want it to cause a segfault, rather than
- an incorrect result. */
- bystride = 0xDEADBEEF;
+ The value is only used for calculation of the
+ memory by the buffer. */
+ bystride = 256;
ycount = 1;
}
else
if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
{
assert (gemm != NULL);
- gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+ const char *transa, *transb;
+ if (try_blas & 2)
+ transa = "C";
+ else
+ transa = axstride == 1 ? "N" : "T";
+
+ if (try_blas & 4)
+ transb = "C";
+ else
+ transb = bxstride == 1 ? "N" : "T";
+
+ gemm (transa, transb , &m,
&n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest,
&ldc, 1, 1);
return;
f13, f14, f23, f24, f33, f34, f43, f44;
index_type i, j, l, ii, jj, ll;
index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+ GFC_COMPLEX_8 *t1;
a = abase;
b = bbase;
b_offset = 1 + b_dim1;
b -= b_offset;
+ /* Empty c first. */
+ for (j=1; j<=n; j++)
+ for (i=1; i<=m; i++)
+ c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+
/* Early exit if possible */
if (m == 0 || n == 0 || k == 0)
return;
/* Adjust size of t1 to what is needed. */
- index_type t1_dim;
- t1_dim = (a_dim1-1) * 256 + b_dim1;
+ index_type t1_dim, a_sz;
+ if (aystride == 1)
+ a_sz = rystride;
+ else
+ a_sz = a_dim1;
+
+ t1_dim = a_sz * 256 + b_dim1;
if (t1_dim > 65536)
t1_dim = 65536;
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wvla"
- GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */
-#pragma GCC diagnostic pop
-
- /* Empty c first. */
- for (j=1; j<=n; j++)
- for (i=1; i<=m; i++)
- c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
/* Start turning the crank. */
i1 = n;
}
}
}
+ free(t1);
return;
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)