int blas_limit, blas_call gemm);
export_proto(matmul_i1);
-
-
-
/* Put exhaustive list of possible architectures here here, ORed together. */
#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
static void
matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
- int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+ int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
static void
matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
#endif /* HAVE_AVX512F */
#ifdef HAVE_AVX2
- if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+ && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
{
matmul_p = matmul_i1_avx2;
goto tailcall;