]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgfortran/m4/matmul.m4
Update copyright years.
[thirdparty/gcc.git] / libgfortran / m4 / matmul.m4
CommitLineData
4ee9c684 1`/* Implementation of the MATMUL intrinsic
fbd26352 2 Copyright (C) 2002-2019 Free Software Foundation, Inc.
4ee9c684 3 Contributed by Paul Brook <paul@nowt.org>
4
553877d9 5This file is part of the GNU Fortran runtime library (libgfortran).
4ee9c684 6
7Libgfortran is free software; you can redistribute it and/or
b417ea8c 8modify it under the terms of the GNU General Public
4ee9c684 9License as published by the Free Software Foundation; either
6bc9506f 10version 3 of the License, or (at your option) any later version.
4ee9c684 11
12Libgfortran is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
b417ea8c 15GNU General Public License for more details.
4ee9c684 16
6bc9506f 17Under Section 7 of GPL version 3, you are granted additional
18permissions described in the GCC Runtime Library Exception, version
193.1, as published by the Free Software Foundation.
20
21You should have received a copy of the GNU General Public License and
22a copy of the GCC Runtime Library Exception along with this program;
23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24<http://www.gnu.org/licenses/>. */
4ee9c684 25
41f2d5e8 26#include "libgfortran.h"
4e867f90 27#include <string.h>
41f2d5e8 28#include <assert.h>'
29
cdafa1f6 30include(iparm.m4)dnl
4ee9c684 31
0a6b5f6b 32`#if defined (HAVE_'rtype_name`)
920e54ef 33
4e8e57b0 34/* Prototype for the BLAS ?gemm subroutine, a pointer to which can be
a7c1a652 35 passed to us by the front-end, in which case we call it for large
4e8e57b0 36 matrices. */
37
38typedef void (*blas_call)(const char *, const char *, const int *, const int *,
0a6b5f6b 39 const int *, const 'rtype_name` *, const 'rtype_name` *,
40 const int *, const 'rtype_name` *, const int *,
41 const 'rtype_name` *, 'rtype_name` *, const int *,
4e8e57b0 42 int, int);
43
018ef8b8 44/* The order of loops is different in the case of plain matrix
45 multiplication C=MATMUL(A,B), and in the frequent special case where
46 the argument A is the temporary result of a TRANSPOSE intrinsic:
47 C=MATMUL(TRANSPOSE(A),B). Transposed temporaries are detected by
48 looking at their strides.
49
50 The equivalent Fortran pseudo-code is:
4e867f90 51
52 DIMENSION A(M,COUNT), B(COUNT,N), C(M,N)
018ef8b8 53 IF (.NOT.IS_TRANSPOSED(A)) THEN
54 C = 0
55 DO J=1,N
56 DO K=1,COUNT
57 DO I=1,M
58 C(I,J) = C(I,J)+A(I,K)*B(K,J)
59 ELSE
60 DO J=1,N
4e867f90 61 DO I=1,M
018ef8b8 62 S = 0
63 DO K=1,COUNT
4e8e57b0 64 S = S+A(I,K)*B(K,J)
018ef8b8 65 C(I,J) = S
66 ENDIF
4e867f90 67*/
68
4e8e57b0 69/* If try_blas is set to a nonzero value, then the matmul function will
70 see if there is a way to perform the matrix multiplication by a call
71 to the BLAS gemm function. */
72
0a6b5f6b 73extern void matmul_'rtype_code` ('rtype` * const restrict retarray,
74 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
4e8e57b0 75 int blas_limit, blas_call gemm);
0a6b5f6b 76export_proto(matmul_'rtype_code`);
7b6cb5bd 77
25df644f 78/* Put exhaustive list of possible architectures here here, ORed together. */
c0d02c82 79
25df644f 80#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
a7c1a652 81
25df644f 82#ifdef HAVE_AVX
83'define(`matmul_name',`matmul_'rtype_code`_avx')dnl
84`static void
85'matmul_name` ('rtype` * const restrict retarray,
86 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
87 int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
88static' include(matmul_internal.m4)dnl
89`#endif /* HAVE_AVX */
90
91#ifdef HAVE_AVX2
92'define(`matmul_name',`matmul_'rtype_code`_avx2')dnl
93`static void
94'matmul_name` ('rtype` * const restrict retarray,
95 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
69077976 96 int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
25df644f 97static' include(matmul_internal.m4)dnl
98`#endif /* HAVE_AVX2 */
99
100#ifdef HAVE_AVX512F
101'define(`matmul_name',`matmul_'rtype_code`_avx512f')dnl
102`static void
103'matmul_name` ('rtype` * const restrict retarray,
104 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
105 int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
106static' include(matmul_internal.m4)dnl
107`#endif /* HAVE_AVX512F */
4ee9c684 108
b4e409cb 109/* AMD-specifix funtions with AVX128 and FMA3/FMA4. */
110
111#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
112'define(`matmul_name',`matmul_'rtype_code`_avx128_fma3')dnl
113`void
114'matmul_name` ('rtype` * const restrict retarray,
115 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
116 int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
117internal_proto('matmul_name`);
118#endif
119
120#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
121'define(`matmul_name',`matmul_'rtype_code`_avx128_fma4')dnl
122`void
123'matmul_name` ('rtype` * const restrict retarray,
124 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
125 int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
126internal_proto('matmul_name`);
127#endif
128
25df644f 129/* Function to fall back to if there is no special processor-specific version. */
130'define(`matmul_name',`matmul_'rtype_code`_vanilla')dnl
131`static' include(matmul_internal.m4)dnl
4e867f90 132
25df644f 133`/* Compiling main function, with selection code for the processor. */
4e867f90 134
25df644f 135/* Currently, this is i386 only. Adjust for other architectures. */
4e867f90 136
25df644f 137#include <config/i386/cpuinfo.h>
138void matmul_'rtype_code` ('rtype` * const restrict retarray,
139 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
140 int blas_limit, blas_call gemm)
141{
142 static void (*matmul_p) ('rtype` * const restrict retarray,
143 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
89594431 144 int blas_limit, blas_call gemm);
145
146 void (*matmul_fn) ('rtype` * const restrict retarray,
147 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
148 int blas_limit, blas_call gemm);
4e867f90 149
89594431 150 matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
151 if (matmul_fn == NULL)
a7c1a652 152 {
89594431 153 matmul_fn = matmul_'rtype_code`_vanilla;
25df644f 154 if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
4e867f90 155 {
25df644f 156 /* Run down the available processors in order of preference. */
157#ifdef HAVE_AVX512F
158 if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
4e867f90 159 {
89594431 160 matmul_fn = matmul_'rtype_code`_avx512f;
161 goto store;
4e867f90 162 }
018ef8b8 163
25df644f 164#endif /* HAVE_AVX512F */
dd5b9961 165
25df644f 166#ifdef HAVE_AVX2
69077976 167 if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
168 && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
018ef8b8 169 {
89594431 170 matmul_fn = matmul_'rtype_code`_avx2;
171 goto store;
018ef8b8 172 }
609b676a 173
25df644f 174#endif
018ef8b8 175
25df644f 176#ifdef HAVE_AVX
177 if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
178 {
89594431 179 matmul_fn = matmul_'rtype_code`_avx;
180 goto store;
018ef8b8 181 }
25df644f 182#endif /* HAVE_AVX */
183 }
b4e409cb 184 else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
185 {
186#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
187 if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
188 && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
189 {
190 matmul_fn = matmul_'rtype_code`_avx128_fma3;
191 goto store;
192 }
193#endif
194#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
195 if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
196 && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
197 {
198 matmul_fn = matmul_'rtype_code`_avx128_fma4;
199 goto store;
200 }
201#endif
202
203 }
89594431 204 store:
205 __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED);
25df644f 206 }
207
89594431 208 (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm);
25df644f 209}
210
211#else /* Just the vanilla function. */
212
213'define(`matmul_name',`matmul_'rtype_code)dnl
214define(`target_attribute',`')dnl
215include(matmul_internal.m4)dnl
216`#endif
a7c1a652 217#endif
25df644f 218'