gfc_code_walker (&ns->code, convert_elseif, dummy_expr_callback, NULL);
gfc_code_walker (&ns->code, cfe_code, cfe_expr_0, NULL);
gfc_code_walker (&ns->code, optimize_code, optimize_expr, NULL);
- if (flag_inline_matmul_limit != 0 || flag_external_blas)
+ if (flag_inline_matmul_limit != 0 || flag_external_blas
+ || flag_external_blas64)
{
bool found;
do
NULL);
}
- if (flag_external_blas)
+ if (flag_external_blas || flag_external_blas64)
gfc_code_walker (&ns->code, call_external_blas, dummy_expr_callback,
NULL);
enum matrix_case m_case;
bool realloc_c;
gfc_code **next_code_point;
+ int arg_kind;
/* Many of the tests for inline matmul also apply here. */
transb, 1);
actual->next = next;
+ if (flag_external_blas)
+ arg_kind = gfc_integer_4_kind;
+ else
+ {
+ gcc_assert (flag_external_blas64);
+ arg_kind = gfc_integer_8_kind;
+ }
+
c1 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (a->expr), 1,
- gfc_integer_4_kind);
+ arg_kind);
c2 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (b->expr), 2,
- gfc_integer_4_kind);
-
+ arg_kind);
b1 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (b->expr), 1,
- gfc_integer_4_kind);
+ arg_kind);
/* Argument M. */
actual = next;
actual = next;
next = gfc_get_actual_arglist ();
next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (matrix_a),
- 1, gfc_integer_4_kind);
+ 1, arg_kind);
actual->next = next;
/* Argument B. */
actual = next;
next = gfc_get_actual_arglist ();
next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (matrix_b),
- 1, gfc_integer_4_kind);
+ 1, arg_kind);
actual->next = next;
/* Argument BETA - set to zero. */
actual = next;
next = gfc_get_actual_arglist ();
next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (expr1),
- 1, gfc_integer_4_kind);
+ 1, arg_kind);
actual->next = next;
return 0;
#define gfc_integer_4_kind 4
#define gfc_real_4_kind 4
+#define gfc_integer_8_kind 8
+
/* symbol.cc */
void gfc_clear_new_implicit (void);
bool gfc_add_new_implicit_range (int, int);
-fbounds-check -ftail-call-workaround -ftail-call-workaround=@var{n}
-fcheck-array-temporaries
-fcheck=<all|array-temps|bits|bounds|do|mem|pointer|recursion>
--fcoarray=<none|single|lib> -fexternal-blas -ff2c
+-fcoarray=<none|single|lib> -fexternal-blas -fexternal-blas64 -ff2c
-ffrontend-loop-interchange -ffrontend-optimize
-finit-character=@var{n} -finit-integer=@var{n} -finit-local-zero
-finit-derived -finit-logical=<true|false>
algorithms, if the size of the matrices involved is larger than a given
limit (see @option{-fblas-matmul-limit}). This may be profitable if an
optimized vendor BLAS library is available. The BLAS library has
-to be specified at link time.
+to be specified at link time. This option specifies a BLAS library
+with integer arguments of default kind (32 bits). It cannot be used
+together with @option{-fexternal-blas64}.
+
+@opindex fexternal-blas64
+@item -fexternal-blas64
+makes @command{gfortran} generate calls to BLAS functions
+for some matrix operations like @code{MATMUL}, instead of using our own
+algorithms, if the size of the matrices involved is larger than a given
+limit (see @option{-fblas-matmul-limit}). This may be profitable if an
+optimized vendor BLAS library is available. The BLAS library has
+to be specified at link time. This option specifies a BLAS library
+with integer arguments of @code{KIND=8} (64 bits). It cannot be used
+together with @option{-fexternal-blas}.
@opindex fblas-matmul-limit
@item -fblas-matmul-limit=@var{n}
-Only significant when @option{-fexternal-blas} is in effect.
-Matrix multiplication of matrices with size larger than (or equal to) @var{n}
-is performed by calls to BLAS functions, while others are
+Only significant when @option{-fexternal-blas} or @option{-fexternal-blas64}
+are in effect. Matrix multiplication of matrices with size larger than or equal
+to @var{n} is performed by calls to BLAS functions, while others are
handled by @command{gfortran} internal algorithms. If the matrices
involved are not square, the size comparison is performed using the
geometric mean of the dimensions of the argument and result matrices.
Fortran Var(flag_external_blas)
Specify that an external BLAS library should be used for matmul calls on large-size arrays.
+fexternal-blas64
+Fortran Var(flag_external_blas64)
+Use an external BLAS library with 64-bit indexing for matmul on large-size arrays.
+
ff2c
Fortran Var(flag_f2c)
Use f2c calling convention.
fexternal-blas
LangUrlSuffix_Fortran(gfortran/Code-Gen-Options.html#index-fexternal-blas)
+fexternal-blas64
+LangUrlSuffix_Fortran(gfortran/Code-Gen-Options.html#index-fexternal-blas64)
+
ff2c
LangUrlSuffix_Fortran(gfortran/Code-Gen-Options.html#index-ff2c)
flag_inline_matmul_limit = 30;
}
- /* Optimization implies front end optimization, unless the user
+ /* We can only have a 32-bit or a 64-bit version of BLAS, not both. */
+
+ if (flag_external_blas && flag_external_blas64)
+ gfc_fatal_error ("32- and 64-bit version of BLAS cannot both be specified");
+
+ /* Optimizationx implies front end optimization, unless the user
specified it directly. */
if (flag_frontend_optimize == -1)
--- /dev/null
+! { dg-do compile }
+! { dg-options "-ffrontend-optimize -fexternal-blas64 -fdump-tree-original" }
+! PR 121161 - option for 64-bit BLAS for MATMUL.
+! Check this by making sure there is no KIND=4 integer.
+subroutine foo(a,b,c,n)
+ implicit none
+ integer(kind=8) :: n
+ real, dimension(n,n) :: a, b, c
+ c = matmul(a,b)
+end subroutine foo
+! { dg-final { scan-tree-dump-not "integer\\(kind=4\\)" "original" } }
+! { dg-final { scan-tree-dump-times "sgemm" 1 "original" } }