From 5d98fe096b5d17021875806ffc32ba41ea0e87b0 Mon Sep 17 00:00:00 2001 From: Thomas Koenig Date: Tue, 24 Sep 2024 21:51:42 +0200 Subject: [PATCH] Implement MATMUL and DOT_PRODUCT for unsigned. gcc/fortran/ChangeLog: * arith.cc (gfc_arith_uminus): Fix warning. (gfc_arith_minus): Correctly truncate unsigneds. * check.cc (gfc_check_dot_product): Handle unsigned arguments. (gfc_check_matmul): Likewise. * expr.cc (gfc_get_unsigned_expr): New function. * gfortran.h (gfc_get_unsigned_expr): Add prototype. * iresolve.cc (gfc_resolve_matmul): If using UNSIGNED, use the signed integer version. * gfortran.texi: Document MATMUL and DOT_PRODUCT for unsigned. * simplify.cc (compute_dot_product): Handle unsigneds. libgfortran/ChangeLog: * m4/iparm.m4: Add UNSIGED if type is m. * m4/matmul.m4: If type is GFC_INTEGER, use GFC_UINTEGER instead. Whitespace fixes. * m4/matmul_internal.m4: Whitespace fixes. * generated/matmul_c10.c: Regenerated. * generated/matmul_c16.c: Regenerated. * generated/matmul_c17.c: Regenerated. * generated/matmul_c4.c: Regenerated. * generated/matmul_c8.c: Regeneraated. * generated/matmul_i1.c: Regenerated. * generated/matmul_i16.c: Regenerated. * generated/matmul_i2.c: Regenerated. * generated/matmul_i4.c: Regenerated. * generated/matmul_i8.c: Regenerated. * generated/matmul_r10.c: Regenerated. * generated/matmul_r16.c: Regenerated. * generated/matmul_r17.c: Regenerated. * generated/matmul_r4.c: Regenerated. * generated/matmul_r8.c: Regenerated. * libgfortran.h: Add array types for unsiged. gcc/testsuite/ChangeLog: * gfortran.dg/unsigned_25.f90: New test. * gfortran.dg/unsigned_26.f90: New test. --- gcc/fortran/arith.cc | 24 +- gcc/fortran/check.cc | 15 +- gcc/fortran/expr.cc | 13 + gcc/fortran/gfortran.h | 1 + gcc/fortran/gfortran.texi | 1 + gcc/fortran/iresolve.cc | 11 +- gcc/fortran/simplify.cc | 10 +- gcc/testsuite/gfortran.dg/unsigned_25.f90 | 35 +++ gcc/testsuite/gfortran.dg/unsigned_26.f90 | 40 +++ libgfortran/generated/matmul_c10.c | 36 +-- libgfortran/generated/matmul_c16.c | 36 +-- libgfortran/generated/matmul_c17.c | 36 +-- libgfortran/generated/matmul_c4.c | 36 +-- libgfortran/generated/matmul_c8.c | 36 +-- libgfortran/generated/matmul_i1.c | 346 +++++++++++----------- libgfortran/generated/matmul_i16.c | 346 +++++++++++----------- libgfortran/generated/matmul_i2.c | 346 +++++++++++----------- libgfortran/generated/matmul_i4.c | 346 +++++++++++----------- libgfortran/generated/matmul_i8.c | 346 +++++++++++----------- libgfortran/generated/matmul_r10.c | 36 +-- libgfortran/generated/matmul_r16.c | 36 +-- libgfortran/generated/matmul_r17.c | 36 +-- libgfortran/generated/matmul_r4.c | 36 +-- libgfortran/generated/matmul_r8.c | 36 +-- libgfortran/libgfortran.h | 7 + libgfortran/m4/iparm.m4 | 2 +- libgfortran/m4/matmul.m4 | 29 +- libgfortran/m4/matmul_internal.m4 | 2 +- 28 files changed, 1207 insertions(+), 1073 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/unsigned_25.f90 create mode 100644 gcc/testsuite/gfortran.dg/unsigned_26.f90 diff --git a/gcc/fortran/arith.cc b/gcc/fortran/arith.cc index 66a3635404a9..a214b8bc1b37 100644 --- a/gcc/fortran/arith.cc +++ b/gcc/fortran/arith.cc @@ -711,17 +711,9 @@ gfc_arith_uminus (gfc_expr *op1, gfc_expr **resultp) case BT_UNSIGNED: { if (pedantic) - return ARITH_UNSIGNED_NEGATIVE; + return check_result (ARITH_UNSIGNED_NEGATIVE, op1, result, resultp); - arith neg_rc; mpz_neg (result->value.integer, op1->value.integer); - neg_rc = gfc_range_check (result); - if (neg_rc != ARITH_OK) - gfc_warning (0, gfc_arith_error (neg_rc), &result->where); - - gfc_reduce_unsigned (result); - if (pedantic) - rc = neg_rc; } break; @@ -738,7 +730,15 @@ gfc_arith_uminus (gfc_expr *op1, gfc_expr **resultp) } rc = gfc_range_check (result); - + if (op1->ts.type == BT_UNSIGNED) + { + if (rc != ARITH_OK) + { + gfc_warning (0, gfc_arith_error (rc), &op1->where); + rc = ARITH_OK; + } + gfc_reduce_unsigned (result); + } return check_result (rc, op1, result, resultp); } @@ -799,8 +799,12 @@ gfc_arith_minus (gfc_expr *op1, gfc_expr *op2, gfc_expr **resultp) switch (op1->ts.type) { case BT_INTEGER: + mpz_sub (result->value.integer, op1->value.integer, op2->value.integer); + break; + case BT_UNSIGNED: mpz_sub (result->value.integer, op1->value.integer, op2->value.integer); + gfc_reduce_unsigned (result); break; case BT_REAL: diff --git a/gcc/fortran/check.cc b/gcc/fortran/check.cc index cfafdb7974f9..7c630dd73f43 100644 --- a/gcc/fortran/check.cc +++ b/gcc/fortran/check.cc @@ -2804,6 +2804,10 @@ gfc_check_dot_product (gfc_expr *vector_a, gfc_expr *vector_b) return false; break; + case BT_UNSIGNED: + /* Check comes later. */ + break; + default: gfc_error ("%qs argument of %qs intrinsic at %L must be numeric " "or LOGICAL", gfc_current_intrinsic_arg[0]->name, @@ -2811,6 +2815,14 @@ gfc_check_dot_product (gfc_expr *vector_a, gfc_expr *vector_b) return false; } + if (gfc_invalid_unsigned_ops (vector_a, vector_b)) + { + gfc_error ("Argument types of %qs intrinsic at %L must match (%s/%s)", + gfc_current_intrinsic, &vector_a->where, + gfc_typename(&vector_a->ts), gfc_typename(&vector_b->ts)); + return false; + } + if (!rank_check (vector_a, 0, 1)) return false; @@ -4092,7 +4104,8 @@ gfc_check_matmul (gfc_expr *matrix_a, gfc_expr *matrix_b) } if ((matrix_a->ts.type == BT_LOGICAL && gfc_numeric_ts (&matrix_b->ts)) - || (gfc_numeric_ts (&matrix_a->ts) && matrix_b->ts.type == BT_LOGICAL)) + || (gfc_numeric_ts (&matrix_a->ts) && matrix_b->ts.type == BT_LOGICAL) + || gfc_invalid_unsigned_ops (matrix_a, matrix_b)) { gfc_error ("Argument types of %qs intrinsic at %L must match (%s/%s)", gfc_current_intrinsic, &matrix_a->where, diff --git a/gcc/fortran/expr.cc b/gcc/fortran/expr.cc index 81c641e23224..36baa9bb4c8e 100644 --- a/gcc/fortran/expr.cc +++ b/gcc/fortran/expr.cc @@ -224,6 +224,19 @@ gfc_get_int_expr (int kind, locus *where, HOST_WIDE_INT value) return p; } +/* Get a new expression node that is an unsigned constant. */ + +gfc_expr * +gfc_get_unsigned_expr (int kind, locus *where, HOST_WIDE_INT value) +{ + gfc_expr *p; + p = gfc_get_constant_expr (BT_UNSIGNED, kind, + where ? where : &gfc_current_locus); + const wide_int w = wi::shwi (value, kind * BITS_PER_UNIT); + wi::to_mpz (w, p->value.integer, UNSIGNED); + + return p; +} /* Get a new expression node that is a logical constant. */ diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 66c9736122a9..917866a7ef0c 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -3794,6 +3794,7 @@ gfc_expr *gfc_get_structure_constructor_expr (bt, int, locus *); gfc_expr *gfc_get_constant_expr (bt, int, locus *); gfc_expr *gfc_get_character_expr (int, locus *, const char *, gfc_charlen_t len); gfc_expr *gfc_get_int_expr (int, locus *, HOST_WIDE_INT); +gfc_expr *gfc_get_unsigned_expr (int, locus *, HOST_WIDE_INT); gfc_expr *gfc_get_logical_expr (int, locus *, bool); gfc_expr *gfc_get_iokind_expr (locus *, io_kind); diff --git a/gcc/fortran/gfortran.texi b/gcc/fortran/gfortran.texi index 60c93d7fe544..829ab00c6653 100644 --- a/gcc/fortran/gfortran.texi +++ b/gcc/fortran/gfortran.texi @@ -2788,6 +2788,7 @@ As of now, the following intrinsics take unsigned arguments: @item @code{MVBITS} @item @code{RANGE} @item @code{TRANSFER} +@item @code{MATMUL} and @code{DOT_PRODUCT} @end itemize This list will grow in the near future. @c --------------------------------------------------------------------- diff --git a/gcc/fortran/iresolve.cc b/gcc/fortran/iresolve.cc index 4f1fa977f6a9..32b31432e58b 100644 --- a/gcc/fortran/iresolve.cc +++ b/gcc/fortran/iresolve.cc @@ -1600,6 +1600,7 @@ void gfc_resolve_matmul (gfc_expr *f, gfc_expr *a, gfc_expr *b) { gfc_expr temp; + bt type; if (a->ts.type == BT_LOGICAL && b->ts.type == BT_LOGICAL) { @@ -1648,8 +1649,16 @@ gfc_resolve_matmul (gfc_expr *f, gfc_expr *a, gfc_expr *b) } } + /* We use the same library version of matmul for INTEGER and UNSIGNED, + which we call as the INTEGER version. */ + + if (f->ts.type == BT_UNSIGNED) + type = BT_INTEGER; + else + type = f->ts.type; + f->value.function.name - = gfc_get_string (PREFIX ("matmul_%c%d"), gfc_type_letter (f->ts.type), + = gfc_get_string (PREFIX ("matmul_%c%d"), gfc_type_letter (type), gfc_type_abi_kind (&f->ts)); } diff --git a/gcc/fortran/simplify.cc b/gcc/fortran/simplify.cc index febf60e4d312..83d0fdc9ea93 100644 --- a/gcc/fortran/simplify.cc +++ b/gcc/fortran/simplify.cc @@ -420,13 +420,20 @@ compute_dot_product (gfc_expr *matrix_a, int stride_a, int offset_a, { gfc_expr *result, *a, *b, *c; - /* Set result to an INTEGER(1) 0 for numeric types and .false. for + /* Set result to an UNSIGNED of correct kind for unsigned, + INTEGER(1) 0 for other numeric types, and .false. for LOGICAL. Mixed-mode math in the loop will promote result to the correct type and kind. */ if (matrix_a->ts.type == BT_LOGICAL) result = gfc_get_logical_expr (gfc_default_logical_kind, NULL, false); + else if (matrix_a->ts.type == BT_UNSIGNED) + { + int kind = MAX (matrix_a->ts.kind, matrix_b->ts.kind); + result = gfc_get_unsigned_expr (kind, NULL, 0); + } else result = gfc_get_int_expr (1, NULL, 0); + result->where = matrix_a->where; a = gfc_constructor_lookup_expr (matrix_a->value.constructor, offset_a); @@ -446,6 +453,7 @@ compute_dot_product (gfc_expr *matrix_a, int stride_a, int offset_a, case BT_INTEGER: case BT_REAL: case BT_COMPLEX: + case BT_UNSIGNED: if (conj_a && a->ts.type == BT_COMPLEX) c = gfc_simplify_conjg (a); else diff --git a/gcc/testsuite/gfortran.dg/unsigned_25.f90 b/gcc/testsuite/gfortran.dg/unsigned_25.f90 new file mode 100644 index 000000000000..f6144988d82a --- /dev/null +++ b/gcc/testsuite/gfortran.dg/unsigned_25.f90 @@ -0,0 +1,35 @@ +! { dg-do run } +! { dg-options "-funsigned" } +! Test matrix multiplication +program memain + implicit none + call test1 + call test2 +contains + subroutine test1 + integer, parameter :: n = 10, m = 28 + unsigned, dimension(n,n) :: u, v, w + integer(kind=8), dimension(n,n) :: i, j, k + real(8), dimension(n,n) :: a, b + + call random_number(a) + call random_number(b) + u = uint(a*2.0**m) + v = uint(b*2.0**m) + i = int(a*2.0**m,8) + j = int(b*2.0**m,8) + w = matmul(u,v) + k = mod(matmul(i,j),2_8**32) + if (any(uint(k) /= w)) error stop 1 + end subroutine test1 + subroutine test2 + unsigned, parameter :: u(3,3) = reshape ([1u, uint(-2), 3u, uint(-4), & + 5u, uint(-6), 7u, uint(-8), 9u],[3,3]) + unsigned, parameter :: v(3,3) = 1u - u + unsigned, parameter :: w(3,3) = matmul(u,v) + integer(kind=16), dimension(3,3), parameter :: & + i = int(u,16), j = int(v,16) + integer(kind=16), dimension(3,3) :: k = matmul(i,j) + if (any(uint(k) /= w)) error stop 2 + end subroutine test2 +end program memain diff --git a/gcc/testsuite/gfortran.dg/unsigned_26.f90 b/gcc/testsuite/gfortran.dg/unsigned_26.f90 new file mode 100644 index 000000000000..b8bad9dcd32b --- /dev/null +++ b/gcc/testsuite/gfortran.dg/unsigned_26.f90 @@ -0,0 +1,40 @@ +! { dg-do run } +! { dg-options "-funsigned" } +! Test dot_product both for runtime and compile +program memain + call test1 + call test2 +contains + subroutine test1 + integer, parameter :: n = 10 + real(8), dimension(n) :: a, b + unsigned, dimension(n) :: u, v + integer(8), dimension(n) :: i, j + unsigned :: res_u + integer(8) :: res_i + integer :: k + + do k=1,10 + call random_number(a) + call random_number(b) + u = uint(a*2**32) + v = uint(b*2**32) + i = int(u,8) + j = int(v,8) + res_u = dot_product(u,v) + res_i = dot_product(i,j) + if (res_u /= uint(res_i)) error stop 1 + end do + end subroutine test1 + + subroutine test2 + integer, parameter :: n = 5 + unsigned, parameter, dimension(n) :: & + u = [1149221887u, 214388752u, 724301838u, 1618160523u, 1783282425u], & + v = [1428464973u, 1887264271u, 1830319906u, 983537781u, 13514400u] + integer(8), parameter, dimension(n) :: i = int(u,8), j=int(v,8) + unsigned, parameter :: res_1 = dot_product(u,v) + integer(8), parameter :: res_2 = dot_product(i,j) + if (res_1 /= uint(res_2)) error stop 2 + end subroutine test2 +end program diff --git a/libgfortran/generated/matmul_c10.c b/libgfortran/generated/matmul_c10.c index c3dbb6d7b0f1..54a8364436cf 100644 --- a/libgfortran/generated/matmul_c10.c +++ b/libgfortran/generated/matmul_c10.c @@ -35,10 +35,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see matrices. */ typedef void (*blas_call)(const char *, const char *, const int *, const int *, - const int *, const GFC_COMPLEX_10 *, const GFC_COMPLEX_10 *, - const int *, const GFC_COMPLEX_10 *, const int *, - const GFC_COMPLEX_10 *, GFC_COMPLEX_10 *, const int *, - int, int); + const int *, const GFC_COMPLEX_10 *, const GFC_COMPLEX_10 *, + const int *, const GFC_COMPLEX_10 *, const int *, + const GFC_COMPLEX_10 *, GFC_COMPLEX_10 *, const int *, + int, int); /* The order of loops is different in the case of plain matrix multiplication C=MATMUL(A,B), and in the frequent special case where @@ -69,7 +69,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *, see if there is a way to perform the matrix multiplication by a call to the BLAS gemm function. */ -extern void matmul_c10 (gfc_array_c10 * const restrict retarray, +extern void matmul_c10 (gfc_array_c10 * const restrict retarray, gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas, int blas_limit, blas_call gemm); export_proto(matmul_c10); @@ -80,11 +80,11 @@ export_proto(matmul_c10); #ifdef HAVE_AVX static void -matmul_c10_avx (gfc_array_c10 * const restrict retarray, +matmul_c10_avx (gfc_array_c10 * const restrict retarray, gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx"))); static void -matmul_c10_avx (gfc_array_c10 * const restrict retarray, +matmul_c10_avx (gfc_array_c10 * const restrict retarray, gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -649,11 +649,11 @@ matmul_c10_avx (gfc_array_c10 * const restrict retarray, #ifdef HAVE_AVX2 static void -matmul_c10_avx2 (gfc_array_c10 * const restrict retarray, +matmul_c10_avx2 (gfc_array_c10 * const restrict retarray, gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma"))); static void -matmul_c10_avx2 (gfc_array_c10 * const restrict retarray, +matmul_c10_avx2 (gfc_array_c10 * const restrict retarray, gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -1218,11 +1218,11 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict retarray, #ifdef HAVE_AVX512F static void -matmul_c10_avx512f (gfc_array_c10 * const restrict retarray, +matmul_c10_avx512f (gfc_array_c10 * const restrict retarray, gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx512f"))); static void -matmul_c10_avx512f (gfc_array_c10 * const restrict retarray, +matmul_c10_avx512f (gfc_array_c10 * const restrict retarray, gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -1789,7 +1789,7 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict retarray, #if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) void -matmul_c10_avx128_fma3 (gfc_array_c10 * const restrict retarray, +matmul_c10_avx128_fma3 (gfc_array_c10 * const restrict retarray, gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); internal_proto(matmul_c10_avx128_fma3); @@ -1797,7 +1797,7 @@ internal_proto(matmul_c10_avx128_fma3); #if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) void -matmul_c10_avx128_fma4 (gfc_array_c10 * const restrict retarray, +matmul_c10_avx128_fma4 (gfc_array_c10 * const restrict retarray, gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); internal_proto(matmul_c10_avx128_fma4); @@ -1805,7 +1805,7 @@ internal_proto(matmul_c10_avx128_fma4); /* Function to fall back to if there is no special processor-specific version. */ static void -matmul_c10_vanilla (gfc_array_c10 * const restrict retarray, +matmul_c10_vanilla (gfc_array_c10 * const restrict retarray, gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -2371,15 +2371,15 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict retarray, /* Currently, this is i386 only. Adjust for other architectures. */ -void matmul_c10 (gfc_array_c10 * const restrict retarray, +void matmul_c10 (gfc_array_c10 * const restrict retarray, gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - static void (*matmul_p) (gfc_array_c10 * const restrict retarray, + static void (*matmul_p) (gfc_array_c10 * const restrict retarray, gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas, int blas_limit, blas_call gemm); - void (*matmul_fn) (gfc_array_c10 * const restrict retarray, + void (*matmul_fn) (gfc_array_c10 * const restrict retarray, gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas, int blas_limit, blas_call gemm); @@ -2447,7 +2447,7 @@ void matmul_c10 (gfc_array_c10 * const restrict retarray, #else /* Just the vanilla function. */ void -matmul_c10 (gfc_array_c10 * const restrict retarray, +matmul_c10 (gfc_array_c10 * const restrict retarray, gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { diff --git a/libgfortran/generated/matmul_c16.c b/libgfortran/generated/matmul_c16.c index 230f17d6e3f0..fce4ce295f5e 100644 --- a/libgfortran/generated/matmul_c16.c +++ b/libgfortran/generated/matmul_c16.c @@ -35,10 +35,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see matrices. */ typedef void (*blas_call)(const char *, const char *, const int *, const int *, - const int *, const GFC_COMPLEX_16 *, const GFC_COMPLEX_16 *, - const int *, const GFC_COMPLEX_16 *, const int *, - const GFC_COMPLEX_16 *, GFC_COMPLEX_16 *, const int *, - int, int); + const int *, const GFC_COMPLEX_16 *, const GFC_COMPLEX_16 *, + const int *, const GFC_COMPLEX_16 *, const int *, + const GFC_COMPLEX_16 *, GFC_COMPLEX_16 *, const int *, + int, int); /* The order of loops is different in the case of plain matrix multiplication C=MATMUL(A,B), and in the frequent special case where @@ -69,7 +69,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *, see if there is a way to perform the matrix multiplication by a call to the BLAS gemm function. */ -extern void matmul_c16 (gfc_array_c16 * const restrict retarray, +extern void matmul_c16 (gfc_array_c16 * const restrict retarray, gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas, int blas_limit, blas_call gemm); export_proto(matmul_c16); @@ -80,11 +80,11 @@ export_proto(matmul_c16); #ifdef HAVE_AVX static void -matmul_c16_avx (gfc_array_c16 * const restrict retarray, +matmul_c16_avx (gfc_array_c16 * const restrict retarray, gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx"))); static void -matmul_c16_avx (gfc_array_c16 * const restrict retarray, +matmul_c16_avx (gfc_array_c16 * const restrict retarray, gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -649,11 +649,11 @@ matmul_c16_avx (gfc_array_c16 * const restrict retarray, #ifdef HAVE_AVX2 static void -matmul_c16_avx2 (gfc_array_c16 * const restrict retarray, +matmul_c16_avx2 (gfc_array_c16 * const restrict retarray, gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma"))); static void -matmul_c16_avx2 (gfc_array_c16 * const restrict retarray, +matmul_c16_avx2 (gfc_array_c16 * const restrict retarray, gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -1218,11 +1218,11 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict retarray, #ifdef HAVE_AVX512F static void -matmul_c16_avx512f (gfc_array_c16 * const restrict retarray, +matmul_c16_avx512f (gfc_array_c16 * const restrict retarray, gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx512f"))); static void -matmul_c16_avx512f (gfc_array_c16 * const restrict retarray, +matmul_c16_avx512f (gfc_array_c16 * const restrict retarray, gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -1789,7 +1789,7 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict retarray, #if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) void -matmul_c16_avx128_fma3 (gfc_array_c16 * const restrict retarray, +matmul_c16_avx128_fma3 (gfc_array_c16 * const restrict retarray, gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); internal_proto(matmul_c16_avx128_fma3); @@ -1797,7 +1797,7 @@ internal_proto(matmul_c16_avx128_fma3); #if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) void -matmul_c16_avx128_fma4 (gfc_array_c16 * const restrict retarray, +matmul_c16_avx128_fma4 (gfc_array_c16 * const restrict retarray, gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); internal_proto(matmul_c16_avx128_fma4); @@ -1805,7 +1805,7 @@ internal_proto(matmul_c16_avx128_fma4); /* Function to fall back to if there is no special processor-specific version. */ static void -matmul_c16_vanilla (gfc_array_c16 * const restrict retarray, +matmul_c16_vanilla (gfc_array_c16 * const restrict retarray, gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -2371,15 +2371,15 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict retarray, /* Currently, this is i386 only. Adjust for other architectures. */ -void matmul_c16 (gfc_array_c16 * const restrict retarray, +void matmul_c16 (gfc_array_c16 * const restrict retarray, gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - static void (*matmul_p) (gfc_array_c16 * const restrict retarray, + static void (*matmul_p) (gfc_array_c16 * const restrict retarray, gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas, int blas_limit, blas_call gemm); - void (*matmul_fn) (gfc_array_c16 * const restrict retarray, + void (*matmul_fn) (gfc_array_c16 * const restrict retarray, gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas, int blas_limit, blas_call gemm); @@ -2447,7 +2447,7 @@ void matmul_c16 (gfc_array_c16 * const restrict retarray, #else /* Just the vanilla function. */ void -matmul_c16 (gfc_array_c16 * const restrict retarray, +matmul_c16 (gfc_array_c16 * const restrict retarray, gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { diff --git a/libgfortran/generated/matmul_c17.c b/libgfortran/generated/matmul_c17.c index cbfd25d27250..aee0d5a15f91 100644 --- a/libgfortran/generated/matmul_c17.c +++ b/libgfortran/generated/matmul_c17.c @@ -35,10 +35,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see matrices. */ typedef void (*blas_call)(const char *, const char *, const int *, const int *, - const int *, const GFC_COMPLEX_17 *, const GFC_COMPLEX_17 *, - const int *, const GFC_COMPLEX_17 *, const int *, - const GFC_COMPLEX_17 *, GFC_COMPLEX_17 *, const int *, - int, int); + const int *, const GFC_COMPLEX_17 *, const GFC_COMPLEX_17 *, + const int *, const GFC_COMPLEX_17 *, const int *, + const GFC_COMPLEX_17 *, GFC_COMPLEX_17 *, const int *, + int, int); /* The order of loops is different in the case of plain matrix multiplication C=MATMUL(A,B), and in the frequent special case where @@ -69,7 +69,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *, see if there is a way to perform the matrix multiplication by a call to the BLAS gemm function. */ -extern void matmul_c17 (gfc_array_c17 * const restrict retarray, +extern void matmul_c17 (gfc_array_c17 * const restrict retarray, gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, int blas_limit, blas_call gemm); export_proto(matmul_c17); @@ -80,11 +80,11 @@ export_proto(matmul_c17); #ifdef HAVE_AVX static void -matmul_c17_avx (gfc_array_c17 * const restrict retarray, +matmul_c17_avx (gfc_array_c17 * const restrict retarray, gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx"))); static void -matmul_c17_avx (gfc_array_c17 * const restrict retarray, +matmul_c17_avx (gfc_array_c17 * const restrict retarray, gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -649,11 +649,11 @@ matmul_c17_avx (gfc_array_c17 * const restrict retarray, #ifdef HAVE_AVX2 static void -matmul_c17_avx2 (gfc_array_c17 * const restrict retarray, +matmul_c17_avx2 (gfc_array_c17 * const restrict retarray, gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma"))); static void -matmul_c17_avx2 (gfc_array_c17 * const restrict retarray, +matmul_c17_avx2 (gfc_array_c17 * const restrict retarray, gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -1218,11 +1218,11 @@ matmul_c17_avx2 (gfc_array_c17 * const restrict retarray, #ifdef HAVE_AVX512F static void -matmul_c17_avx512f (gfc_array_c17 * const restrict retarray, +matmul_c17_avx512f (gfc_array_c17 * const restrict retarray, gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx512f"))); static void -matmul_c17_avx512f (gfc_array_c17 * const restrict retarray, +matmul_c17_avx512f (gfc_array_c17 * const restrict retarray, gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -1789,7 +1789,7 @@ matmul_c17_avx512f (gfc_array_c17 * const restrict retarray, #if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) void -matmul_c17_avx128_fma3 (gfc_array_c17 * const restrict retarray, +matmul_c17_avx128_fma3 (gfc_array_c17 * const restrict retarray, gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); internal_proto(matmul_c17_avx128_fma3); @@ -1797,7 +1797,7 @@ internal_proto(matmul_c17_avx128_fma3); #if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) void -matmul_c17_avx128_fma4 (gfc_array_c17 * const restrict retarray, +matmul_c17_avx128_fma4 (gfc_array_c17 * const restrict retarray, gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); internal_proto(matmul_c17_avx128_fma4); @@ -1805,7 +1805,7 @@ internal_proto(matmul_c17_avx128_fma4); /* Function to fall back to if there is no special processor-specific version. */ static void -matmul_c17_vanilla (gfc_array_c17 * const restrict retarray, +matmul_c17_vanilla (gfc_array_c17 * const restrict retarray, gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -2371,15 +2371,15 @@ matmul_c17_vanilla (gfc_array_c17 * const restrict retarray, /* Currently, this is i386 only. Adjust for other architectures. */ -void matmul_c17 (gfc_array_c17 * const restrict retarray, +void matmul_c17 (gfc_array_c17 * const restrict retarray, gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - static void (*matmul_p) (gfc_array_c17 * const restrict retarray, + static void (*matmul_p) (gfc_array_c17 * const restrict retarray, gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, int blas_limit, blas_call gemm); - void (*matmul_fn) (gfc_array_c17 * const restrict retarray, + void (*matmul_fn) (gfc_array_c17 * const restrict retarray, gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, int blas_limit, blas_call gemm); @@ -2447,7 +2447,7 @@ void matmul_c17 (gfc_array_c17 * const restrict retarray, #else /* Just the vanilla function. */ void -matmul_c17 (gfc_array_c17 * const restrict retarray, +matmul_c17 (gfc_array_c17 * const restrict retarray, gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { diff --git a/libgfortran/generated/matmul_c4.c b/libgfortran/generated/matmul_c4.c index c8f4550b1b8e..2ab8a6f317a0 100644 --- a/libgfortran/generated/matmul_c4.c +++ b/libgfortran/generated/matmul_c4.c @@ -35,10 +35,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see matrices. */ typedef void (*blas_call)(const char *, const char *, const int *, const int *, - const int *, const GFC_COMPLEX_4 *, const GFC_COMPLEX_4 *, - const int *, const GFC_COMPLEX_4 *, const int *, - const GFC_COMPLEX_4 *, GFC_COMPLEX_4 *, const int *, - int, int); + const int *, const GFC_COMPLEX_4 *, const GFC_COMPLEX_4 *, + const int *, const GFC_COMPLEX_4 *, const int *, + const GFC_COMPLEX_4 *, GFC_COMPLEX_4 *, const int *, + int, int); /* The order of loops is different in the case of plain matrix multiplication C=MATMUL(A,B), and in the frequent special case where @@ -69,7 +69,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *, see if there is a way to perform the matrix multiplication by a call to the BLAS gemm function. */ -extern void matmul_c4 (gfc_array_c4 * const restrict retarray, +extern void matmul_c4 (gfc_array_c4 * const restrict retarray, gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas, int blas_limit, blas_call gemm); export_proto(matmul_c4); @@ -80,11 +80,11 @@ export_proto(matmul_c4); #ifdef HAVE_AVX static void -matmul_c4_avx (gfc_array_c4 * const restrict retarray, +matmul_c4_avx (gfc_array_c4 * const restrict retarray, gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx"))); static void -matmul_c4_avx (gfc_array_c4 * const restrict retarray, +matmul_c4_avx (gfc_array_c4 * const restrict retarray, gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -649,11 +649,11 @@ matmul_c4_avx (gfc_array_c4 * const restrict retarray, #ifdef HAVE_AVX2 static void -matmul_c4_avx2 (gfc_array_c4 * const restrict retarray, +matmul_c4_avx2 (gfc_array_c4 * const restrict retarray, gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma"))); static void -matmul_c4_avx2 (gfc_array_c4 * const restrict retarray, +matmul_c4_avx2 (gfc_array_c4 * const restrict retarray, gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -1218,11 +1218,11 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict retarray, #ifdef HAVE_AVX512F static void -matmul_c4_avx512f (gfc_array_c4 * const restrict retarray, +matmul_c4_avx512f (gfc_array_c4 * const restrict retarray, gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx512f"))); static void -matmul_c4_avx512f (gfc_array_c4 * const restrict retarray, +matmul_c4_avx512f (gfc_array_c4 * const restrict retarray, gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -1789,7 +1789,7 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict retarray, #if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) void -matmul_c4_avx128_fma3 (gfc_array_c4 * const restrict retarray, +matmul_c4_avx128_fma3 (gfc_array_c4 * const restrict retarray, gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); internal_proto(matmul_c4_avx128_fma3); @@ -1797,7 +1797,7 @@ internal_proto(matmul_c4_avx128_fma3); #if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) void -matmul_c4_avx128_fma4 (gfc_array_c4 * const restrict retarray, +matmul_c4_avx128_fma4 (gfc_array_c4 * const restrict retarray, gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); internal_proto(matmul_c4_avx128_fma4); @@ -1805,7 +1805,7 @@ internal_proto(matmul_c4_avx128_fma4); /* Function to fall back to if there is no special processor-specific version. */ static void -matmul_c4_vanilla (gfc_array_c4 * const restrict retarray, +matmul_c4_vanilla (gfc_array_c4 * const restrict retarray, gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -2371,15 +2371,15 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict retarray, /* Currently, this is i386 only. Adjust for other architectures. */ -void matmul_c4 (gfc_array_c4 * const restrict retarray, +void matmul_c4 (gfc_array_c4 * const restrict retarray, gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - static void (*matmul_p) (gfc_array_c4 * const restrict retarray, + static void (*matmul_p) (gfc_array_c4 * const restrict retarray, gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas, int blas_limit, blas_call gemm); - void (*matmul_fn) (gfc_array_c4 * const restrict retarray, + void (*matmul_fn) (gfc_array_c4 * const restrict retarray, gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas, int blas_limit, blas_call gemm); @@ -2447,7 +2447,7 @@ void matmul_c4 (gfc_array_c4 * const restrict retarray, #else /* Just the vanilla function. */ void -matmul_c4 (gfc_array_c4 * const restrict retarray, +matmul_c4 (gfc_array_c4 * const restrict retarray, gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { diff --git a/libgfortran/generated/matmul_c8.c b/libgfortran/generated/matmul_c8.c index 5c5928d824ac..fb5246ec78fb 100644 --- a/libgfortran/generated/matmul_c8.c +++ b/libgfortran/generated/matmul_c8.c @@ -35,10 +35,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see matrices. */ typedef void (*blas_call)(const char *, const char *, const int *, const int *, - const int *, const GFC_COMPLEX_8 *, const GFC_COMPLEX_8 *, - const int *, const GFC_COMPLEX_8 *, const int *, - const GFC_COMPLEX_8 *, GFC_COMPLEX_8 *, const int *, - int, int); + const int *, const GFC_COMPLEX_8 *, const GFC_COMPLEX_8 *, + const int *, const GFC_COMPLEX_8 *, const int *, + const GFC_COMPLEX_8 *, GFC_COMPLEX_8 *, const int *, + int, int); /* The order of loops is different in the case of plain matrix multiplication C=MATMUL(A,B), and in the frequent special case where @@ -69,7 +69,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *, see if there is a way to perform the matrix multiplication by a call to the BLAS gemm function. */ -extern void matmul_c8 (gfc_array_c8 * const restrict retarray, +extern void matmul_c8 (gfc_array_c8 * const restrict retarray, gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas, int blas_limit, blas_call gemm); export_proto(matmul_c8); @@ -80,11 +80,11 @@ export_proto(matmul_c8); #ifdef HAVE_AVX static void -matmul_c8_avx (gfc_array_c8 * const restrict retarray, +matmul_c8_avx (gfc_array_c8 * const restrict retarray, gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx"))); static void -matmul_c8_avx (gfc_array_c8 * const restrict retarray, +matmul_c8_avx (gfc_array_c8 * const restrict retarray, gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -649,11 +649,11 @@ matmul_c8_avx (gfc_array_c8 * const restrict retarray, #ifdef HAVE_AVX2 static void -matmul_c8_avx2 (gfc_array_c8 * const restrict retarray, +matmul_c8_avx2 (gfc_array_c8 * const restrict retarray, gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma"))); static void -matmul_c8_avx2 (gfc_array_c8 * const restrict retarray, +matmul_c8_avx2 (gfc_array_c8 * const restrict retarray, gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -1218,11 +1218,11 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict retarray, #ifdef HAVE_AVX512F static void -matmul_c8_avx512f (gfc_array_c8 * const restrict retarray, +matmul_c8_avx512f (gfc_array_c8 * const restrict retarray, gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx512f"))); static void -matmul_c8_avx512f (gfc_array_c8 * const restrict retarray, +matmul_c8_avx512f (gfc_array_c8 * const restrict retarray, gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -1789,7 +1789,7 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict retarray, #if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) void -matmul_c8_avx128_fma3 (gfc_array_c8 * const restrict retarray, +matmul_c8_avx128_fma3 (gfc_array_c8 * const restrict retarray, gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); internal_proto(matmul_c8_avx128_fma3); @@ -1797,7 +1797,7 @@ internal_proto(matmul_c8_avx128_fma3); #if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) void -matmul_c8_avx128_fma4 (gfc_array_c8 * const restrict retarray, +matmul_c8_avx128_fma4 (gfc_array_c8 * const restrict retarray, gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); internal_proto(matmul_c8_avx128_fma4); @@ -1805,7 +1805,7 @@ internal_proto(matmul_c8_avx128_fma4); /* Function to fall back to if there is no special processor-specific version. */ static void -matmul_c8_vanilla (gfc_array_c8 * const restrict retarray, +matmul_c8_vanilla (gfc_array_c8 * const restrict retarray, gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -2371,15 +2371,15 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict retarray, /* Currently, this is i386 only. Adjust for other architectures. */ -void matmul_c8 (gfc_array_c8 * const restrict retarray, +void matmul_c8 (gfc_array_c8 * const restrict retarray, gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - static void (*matmul_p) (gfc_array_c8 * const restrict retarray, + static void (*matmul_p) (gfc_array_c8 * const restrict retarray, gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas, int blas_limit, blas_call gemm); - void (*matmul_fn) (gfc_array_c8 * const restrict retarray, + void (*matmul_fn) (gfc_array_c8 * const restrict retarray, gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas, int blas_limit, blas_call gemm); @@ -2447,7 +2447,7 @@ void matmul_c8 (gfc_array_c8 * const restrict retarray, #else /* Just the vanilla function. */ void -matmul_c8 (gfc_array_c8 * const restrict retarray, +matmul_c8 (gfc_array_c8 * const restrict retarray, gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { diff --git a/libgfortran/generated/matmul_i1.c b/libgfortran/generated/matmul_i1.c index 7a30ad818a2c..51e020afb5cf 100644 --- a/libgfortran/generated/matmul_i1.c +++ b/libgfortran/generated/matmul_i1.c @@ -28,17 +28,17 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #include -#if defined (HAVE_GFC_INTEGER_1) +#if defined (HAVE_GFC_UINTEGER_1) /* Prototype for the BLAS ?gemm subroutine, a pointer to which can be passed to us by the front-end, in which case we call it for large matrices. */ typedef void (*blas_call)(const char *, const char *, const int *, const int *, - const int *, const GFC_INTEGER_1 *, const GFC_INTEGER_1 *, - const int *, const GFC_INTEGER_1 *, const int *, - const GFC_INTEGER_1 *, GFC_INTEGER_1 *, const int *, - int, int); + const int *, const GFC_UINTEGER_1 *, const GFC_UINTEGER_1 *, + const int *, const GFC_UINTEGER_1 *, const int *, + const GFC_UINTEGER_1 *, GFC_UINTEGER_1 *, const int *, + int, int); /* The order of loops is different in the case of plain matrix multiplication C=MATMUL(A,B), and in the frequent special case where @@ -69,8 +69,8 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *, see if there is a way to perform the matrix multiplication by a call to the BLAS gemm function. */ -extern void matmul_i1 (gfc_array_i1 * const restrict retarray, - gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas, +extern void matmul_i1 (gfc_array_m1 * const restrict retarray, + gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas, int blas_limit, blas_call gemm); export_proto(matmul_i1); @@ -80,17 +80,17 @@ export_proto(matmul_i1); #ifdef HAVE_AVX static void -matmul_i1_avx (gfc_array_i1 * const restrict retarray, - gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas, +matmul_i1_avx (gfc_array_m1 * const restrict retarray, + gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx"))); static void -matmul_i1_avx (gfc_array_i1 * const restrict retarray, - gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas, +matmul_i1_avx (gfc_array_m1 * const restrict retarray, + gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_1 * restrict abase; - const GFC_INTEGER_1 * restrict bbase; - GFC_INTEGER_1 * restrict dest; + const GFC_UINTEGER_1 * restrict abase; + const GFC_UINTEGER_1 * restrict bbase; + GFC_UINTEGER_1 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -132,7 +132,7 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_1)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_1)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -251,7 +251,7 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_1 one = 1, zero = 0; + const GFC_UINTEGER_1 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -289,8 +289,8 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_1 *a, *b; - GFC_INTEGER_1 *c; + const GFC_UINTEGER_1 *a, *b; + GFC_UINTEGER_1 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -298,11 +298,11 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_1 *t1; + GFC_UINTEGER_1 *t1; a = abase; b = bbase; @@ -322,7 +322,7 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_1)0; + c[i + j * c_dim1] = (GFC_UINTEGER_1)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -339,7 +339,7 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_1)); /* Start turning the crank. */ i1 = n; @@ -557,10 +557,10 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_1 *restrict abase_x; - const GFC_INTEGER_1 *restrict bbase_y; - GFC_INTEGER_1 *restrict dest_y; - GFC_INTEGER_1 s; + const GFC_UINTEGER_1 *restrict abase_x; + const GFC_UINTEGER_1 *restrict bbase_y; + GFC_UINTEGER_1 *restrict dest_y; + GFC_UINTEGER_1 s; for (y = 0; y < ycount; y++) { @@ -569,7 +569,7 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_1) 0; + s = (GFC_UINTEGER_1) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -578,13 +578,13 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray, } else { - const GFC_INTEGER_1 *restrict bbase_y; - GFC_INTEGER_1 s; + const GFC_UINTEGER_1 *restrict bbase_y; + GFC_UINTEGER_1 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_1) 0; + s = (GFC_UINTEGER_1) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -593,13 +593,13 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_1 *restrict bbase_y; - GFC_INTEGER_1 s; + const GFC_UINTEGER_1 *restrict bbase_y; + GFC_UINTEGER_1 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_1) 0; + s = (GFC_UINTEGER_1) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -609,7 +609,7 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_1)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -621,10 +621,10 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray, } else { - const GFC_INTEGER_1 *restrict abase_x; - const GFC_INTEGER_1 *restrict bbase_y; - GFC_INTEGER_1 *restrict dest_y; - GFC_INTEGER_1 s; + const GFC_UINTEGER_1 *restrict abase_x; + const GFC_UINTEGER_1 *restrict bbase_y; + GFC_UINTEGER_1 *restrict dest_y; + GFC_UINTEGER_1 s; for (y = 0; y < ycount; y++) { @@ -633,7 +633,7 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_1) 0; + s = (GFC_UINTEGER_1) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; @@ -649,17 +649,17 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray, #ifdef HAVE_AVX2 static void -matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, - gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas, +matmul_i1_avx2 (gfc_array_m1 * const restrict retarray, + gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma"))); static void -matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, - gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas, +matmul_i1_avx2 (gfc_array_m1 * const restrict retarray, + gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_1 * restrict abase; - const GFC_INTEGER_1 * restrict bbase; - GFC_INTEGER_1 * restrict dest; + const GFC_UINTEGER_1 * restrict abase; + const GFC_UINTEGER_1 * restrict bbase; + GFC_UINTEGER_1 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -701,7 +701,7 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_1)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_1)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -820,7 +820,7 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_1 one = 1, zero = 0; + const GFC_UINTEGER_1 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -858,8 +858,8 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_1 *a, *b; - GFC_INTEGER_1 *c; + const GFC_UINTEGER_1 *a, *b; + GFC_UINTEGER_1 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -867,11 +867,11 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_1 *t1; + GFC_UINTEGER_1 *t1; a = abase; b = bbase; @@ -891,7 +891,7 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_1)0; + c[i + j * c_dim1] = (GFC_UINTEGER_1)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -908,7 +908,7 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_1)); /* Start turning the crank. */ i1 = n; @@ -1126,10 +1126,10 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_1 *restrict abase_x; - const GFC_INTEGER_1 *restrict bbase_y; - GFC_INTEGER_1 *restrict dest_y; - GFC_INTEGER_1 s; + const GFC_UINTEGER_1 *restrict abase_x; + const GFC_UINTEGER_1 *restrict bbase_y; + GFC_UINTEGER_1 *restrict dest_y; + GFC_UINTEGER_1 s; for (y = 0; y < ycount; y++) { @@ -1138,7 +1138,7 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_1) 0; + s = (GFC_UINTEGER_1) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -1147,13 +1147,13 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, } else { - const GFC_INTEGER_1 *restrict bbase_y; - GFC_INTEGER_1 s; + const GFC_UINTEGER_1 *restrict bbase_y; + GFC_UINTEGER_1 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_1) 0; + s = (GFC_UINTEGER_1) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -1162,13 +1162,13 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_1 *restrict bbase_y; - GFC_INTEGER_1 s; + const GFC_UINTEGER_1 *restrict bbase_y; + GFC_UINTEGER_1 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_1) 0; + s = (GFC_UINTEGER_1) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -1178,7 +1178,7 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_1)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -1190,10 +1190,10 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, } else { - const GFC_INTEGER_1 *restrict abase_x; - const GFC_INTEGER_1 *restrict bbase_y; - GFC_INTEGER_1 *restrict dest_y; - GFC_INTEGER_1 s; + const GFC_UINTEGER_1 *restrict abase_x; + const GFC_UINTEGER_1 *restrict bbase_y; + GFC_UINTEGER_1 *restrict dest_y; + GFC_UINTEGER_1 s; for (y = 0; y < ycount; y++) { @@ -1202,7 +1202,7 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_1) 0; + s = (GFC_UINTEGER_1) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; @@ -1218,17 +1218,17 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, #ifdef HAVE_AVX512F static void -matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, - gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas, +matmul_i1_avx512f (gfc_array_m1 * const restrict retarray, + gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx512f"))); static void -matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, - gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas, +matmul_i1_avx512f (gfc_array_m1 * const restrict retarray, + gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_1 * restrict abase; - const GFC_INTEGER_1 * restrict bbase; - GFC_INTEGER_1 * restrict dest; + const GFC_UINTEGER_1 * restrict abase; + const GFC_UINTEGER_1 * restrict bbase; + GFC_UINTEGER_1 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -1270,7 +1270,7 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_1)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_1)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -1389,7 +1389,7 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_1 one = 1, zero = 0; + const GFC_UINTEGER_1 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -1427,8 +1427,8 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_1 *a, *b; - GFC_INTEGER_1 *c; + const GFC_UINTEGER_1 *a, *b; + GFC_UINTEGER_1 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -1436,11 +1436,11 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_1 *t1; + GFC_UINTEGER_1 *t1; a = abase; b = bbase; @@ -1460,7 +1460,7 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_1)0; + c[i + j * c_dim1] = (GFC_UINTEGER_1)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -1477,7 +1477,7 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_1)); /* Start turning the crank. */ i1 = n; @@ -1695,10 +1695,10 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_1 *restrict abase_x; - const GFC_INTEGER_1 *restrict bbase_y; - GFC_INTEGER_1 *restrict dest_y; - GFC_INTEGER_1 s; + const GFC_UINTEGER_1 *restrict abase_x; + const GFC_UINTEGER_1 *restrict bbase_y; + GFC_UINTEGER_1 *restrict dest_y; + GFC_UINTEGER_1 s; for (y = 0; y < ycount; y++) { @@ -1707,7 +1707,7 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_1) 0; + s = (GFC_UINTEGER_1) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -1716,13 +1716,13 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, } else { - const GFC_INTEGER_1 *restrict bbase_y; - GFC_INTEGER_1 s; + const GFC_UINTEGER_1 *restrict bbase_y; + GFC_UINTEGER_1 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_1) 0; + s = (GFC_UINTEGER_1) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -1731,13 +1731,13 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_1 *restrict bbase_y; - GFC_INTEGER_1 s; + const GFC_UINTEGER_1 *restrict bbase_y; + GFC_UINTEGER_1 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_1) 0; + s = (GFC_UINTEGER_1) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -1747,7 +1747,7 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_1)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -1759,10 +1759,10 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, } else { - const GFC_INTEGER_1 *restrict abase_x; - const GFC_INTEGER_1 *restrict bbase_y; - GFC_INTEGER_1 *restrict dest_y; - GFC_INTEGER_1 s; + const GFC_UINTEGER_1 *restrict abase_x; + const GFC_UINTEGER_1 *restrict bbase_y; + GFC_UINTEGER_1 *restrict dest_y; + GFC_UINTEGER_1 s; for (y = 0; y < ycount; y++) { @@ -1771,7 +1771,7 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_1) 0; + s = (GFC_UINTEGER_1) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; @@ -1789,29 +1789,29 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, #if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) void -matmul_i1_avx128_fma3 (gfc_array_i1 * const restrict retarray, - gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas, +matmul_i1_avx128_fma3 (gfc_array_m1 * const restrict retarray, + gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); internal_proto(matmul_i1_avx128_fma3); #endif #if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) void -matmul_i1_avx128_fma4 (gfc_array_i1 * const restrict retarray, - gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas, +matmul_i1_avx128_fma4 (gfc_array_m1 * const restrict retarray, + gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); internal_proto(matmul_i1_avx128_fma4); #endif /* Function to fall back to if there is no special processor-specific version. */ static void -matmul_i1_vanilla (gfc_array_i1 * const restrict retarray, - gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas, +matmul_i1_vanilla (gfc_array_m1 * const restrict retarray, + gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_1 * restrict abase; - const GFC_INTEGER_1 * restrict bbase; - GFC_INTEGER_1 * restrict dest; + const GFC_UINTEGER_1 * restrict abase; + const GFC_UINTEGER_1 * restrict bbase; + GFC_UINTEGER_1 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -1853,7 +1853,7 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_1)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_1)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -1972,7 +1972,7 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_1 one = 1, zero = 0; + const GFC_UINTEGER_1 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -2010,8 +2010,8 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_1 *a, *b; - GFC_INTEGER_1 *c; + const GFC_UINTEGER_1 *a, *b; + GFC_UINTEGER_1 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -2019,11 +2019,11 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_1 *t1; + GFC_UINTEGER_1 *t1; a = abase; b = bbase; @@ -2043,7 +2043,7 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_1)0; + c[i + j * c_dim1] = (GFC_UINTEGER_1)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -2060,7 +2060,7 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_1)); /* Start turning the crank. */ i1 = n; @@ -2278,10 +2278,10 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_1 *restrict abase_x; - const GFC_INTEGER_1 *restrict bbase_y; - GFC_INTEGER_1 *restrict dest_y; - GFC_INTEGER_1 s; + const GFC_UINTEGER_1 *restrict abase_x; + const GFC_UINTEGER_1 *restrict bbase_y; + GFC_UINTEGER_1 *restrict dest_y; + GFC_UINTEGER_1 s; for (y = 0; y < ycount; y++) { @@ -2290,7 +2290,7 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_1) 0; + s = (GFC_UINTEGER_1) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -2299,13 +2299,13 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray, } else { - const GFC_INTEGER_1 *restrict bbase_y; - GFC_INTEGER_1 s; + const GFC_UINTEGER_1 *restrict bbase_y; + GFC_UINTEGER_1 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_1) 0; + s = (GFC_UINTEGER_1) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -2314,13 +2314,13 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_1 *restrict bbase_y; - GFC_INTEGER_1 s; + const GFC_UINTEGER_1 *restrict bbase_y; + GFC_UINTEGER_1 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_1) 0; + s = (GFC_UINTEGER_1) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -2330,7 +2330,7 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_1)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -2342,10 +2342,10 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray, } else { - const GFC_INTEGER_1 *restrict abase_x; - const GFC_INTEGER_1 *restrict bbase_y; - GFC_INTEGER_1 *restrict dest_y; - GFC_INTEGER_1 s; + const GFC_UINTEGER_1 *restrict abase_x; + const GFC_UINTEGER_1 *restrict bbase_y; + GFC_UINTEGER_1 *restrict dest_y; + GFC_UINTEGER_1 s; for (y = 0; y < ycount; y++) { @@ -2354,7 +2354,7 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_1) 0; + s = (GFC_UINTEGER_1) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; @@ -2371,16 +2371,16 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray, /* Currently, this is i386 only. Adjust for other architectures. */ -void matmul_i1 (gfc_array_i1 * const restrict retarray, - gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas, +void matmul_i1 (gfc_array_m1 * const restrict retarray, + gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - static void (*matmul_p) (gfc_array_i1 * const restrict retarray, - gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas, + static void (*matmul_p) (gfc_array_m1 * const restrict retarray, + gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas, int blas_limit, blas_call gemm); - void (*matmul_fn) (gfc_array_i1 * const restrict retarray, - gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas, + void (*matmul_fn) (gfc_array_m1 * const restrict retarray, + gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas, int blas_limit, blas_call gemm); matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); @@ -2447,13 +2447,13 @@ void matmul_i1 (gfc_array_i1 * const restrict retarray, #else /* Just the vanilla function. */ void -matmul_i1 (gfc_array_i1 * const restrict retarray, - gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas, +matmul_i1 (gfc_array_m1 * const restrict retarray, + gfc_array_m1 * const restrict a, gfc_array_m1 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_1 * restrict abase; - const GFC_INTEGER_1 * restrict bbase; - GFC_INTEGER_1 * restrict dest; + const GFC_UINTEGER_1 * restrict abase; + const GFC_UINTEGER_1 * restrict bbase; + GFC_UINTEGER_1 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -2495,7 +2495,7 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_1)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_1)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -2614,7 +2614,7 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_1 one = 1, zero = 0; + const GFC_UINTEGER_1 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -2652,8 +2652,8 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_1 *a, *b; - GFC_INTEGER_1 *c; + const GFC_UINTEGER_1 *a, *b; + GFC_UINTEGER_1 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -2661,11 +2661,11 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_1 *t1; + GFC_UINTEGER_1 *t1; a = abase; b = bbase; @@ -2685,7 +2685,7 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_1)0; + c[i + j * c_dim1] = (GFC_UINTEGER_1)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -2702,7 +2702,7 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_1)); /* Start turning the crank. */ i1 = n; @@ -2920,10 +2920,10 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_1 *restrict abase_x; - const GFC_INTEGER_1 *restrict bbase_y; - GFC_INTEGER_1 *restrict dest_y; - GFC_INTEGER_1 s; + const GFC_UINTEGER_1 *restrict abase_x; + const GFC_UINTEGER_1 *restrict bbase_y; + GFC_UINTEGER_1 *restrict dest_y; + GFC_UINTEGER_1 s; for (y = 0; y < ycount; y++) { @@ -2932,7 +2932,7 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_1) 0; + s = (GFC_UINTEGER_1) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -2941,13 +2941,13 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, } else { - const GFC_INTEGER_1 *restrict bbase_y; - GFC_INTEGER_1 s; + const GFC_UINTEGER_1 *restrict bbase_y; + GFC_UINTEGER_1 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_1) 0; + s = (GFC_UINTEGER_1) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -2956,13 +2956,13 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_1 *restrict bbase_y; - GFC_INTEGER_1 s; + const GFC_UINTEGER_1 *restrict bbase_y; + GFC_UINTEGER_1 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_1) 0; + s = (GFC_UINTEGER_1) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -2972,7 +2972,7 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_1)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -2984,10 +2984,10 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, } else { - const GFC_INTEGER_1 *restrict abase_x; - const GFC_INTEGER_1 *restrict bbase_y; - GFC_INTEGER_1 *restrict dest_y; - GFC_INTEGER_1 s; + const GFC_UINTEGER_1 *restrict abase_x; + const GFC_UINTEGER_1 *restrict bbase_y; + GFC_UINTEGER_1 *restrict dest_y; + GFC_UINTEGER_1 s; for (y = 0; y < ycount; y++) { @@ -2996,7 +2996,7 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_1) 0; + s = (GFC_UINTEGER_1) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; diff --git a/libgfortran/generated/matmul_i16.c b/libgfortran/generated/matmul_i16.c index cf72f6565118..9a7eee4ddc94 100644 --- a/libgfortran/generated/matmul_i16.c +++ b/libgfortran/generated/matmul_i16.c @@ -28,17 +28,17 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #include -#if defined (HAVE_GFC_INTEGER_16) +#if defined (HAVE_GFC_UINTEGER_16) /* Prototype for the BLAS ?gemm subroutine, a pointer to which can be passed to us by the front-end, in which case we call it for large matrices. */ typedef void (*blas_call)(const char *, const char *, const int *, const int *, - const int *, const GFC_INTEGER_16 *, const GFC_INTEGER_16 *, - const int *, const GFC_INTEGER_16 *, const int *, - const GFC_INTEGER_16 *, GFC_INTEGER_16 *, const int *, - int, int); + const int *, const GFC_UINTEGER_16 *, const GFC_UINTEGER_16 *, + const int *, const GFC_UINTEGER_16 *, const int *, + const GFC_UINTEGER_16 *, GFC_UINTEGER_16 *, const int *, + int, int); /* The order of loops is different in the case of plain matrix multiplication C=MATMUL(A,B), and in the frequent special case where @@ -69,8 +69,8 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *, see if there is a way to perform the matrix multiplication by a call to the BLAS gemm function. */ -extern void matmul_i16 (gfc_array_i16 * const restrict retarray, - gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas, +extern void matmul_i16 (gfc_array_m16 * const restrict retarray, + gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas, int blas_limit, blas_call gemm); export_proto(matmul_i16); @@ -80,17 +80,17 @@ export_proto(matmul_i16); #ifdef HAVE_AVX static void -matmul_i16_avx (gfc_array_i16 * const restrict retarray, - gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas, +matmul_i16_avx (gfc_array_m16 * const restrict retarray, + gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx"))); static void -matmul_i16_avx (gfc_array_i16 * const restrict retarray, - gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas, +matmul_i16_avx (gfc_array_m16 * const restrict retarray, + gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_16 * restrict abase; - const GFC_INTEGER_16 * restrict bbase; - GFC_INTEGER_16 * restrict dest; + const GFC_UINTEGER_16 * restrict abase; + const GFC_UINTEGER_16 * restrict bbase; + GFC_UINTEGER_16 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -132,7 +132,7 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_16)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_16)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -251,7 +251,7 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_16 one = 1, zero = 0; + const GFC_UINTEGER_16 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -289,8 +289,8 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_16 *a, *b; - GFC_INTEGER_16 *c; + const GFC_UINTEGER_16 *a, *b; + GFC_UINTEGER_16 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -298,11 +298,11 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_16 *t1; + GFC_UINTEGER_16 *t1; a = abase; b = bbase; @@ -322,7 +322,7 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_16)0; + c[i + j * c_dim1] = (GFC_UINTEGER_16)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -339,7 +339,7 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_16)); /* Start turning the crank. */ i1 = n; @@ -557,10 +557,10 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_16 *restrict abase_x; - const GFC_INTEGER_16 *restrict bbase_y; - GFC_INTEGER_16 *restrict dest_y; - GFC_INTEGER_16 s; + const GFC_UINTEGER_16 *restrict abase_x; + const GFC_UINTEGER_16 *restrict bbase_y; + GFC_UINTEGER_16 *restrict dest_y; + GFC_UINTEGER_16 s; for (y = 0; y < ycount; y++) { @@ -569,7 +569,7 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_16) 0; + s = (GFC_UINTEGER_16) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -578,13 +578,13 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray, } else { - const GFC_INTEGER_16 *restrict bbase_y; - GFC_INTEGER_16 s; + const GFC_UINTEGER_16 *restrict bbase_y; + GFC_UINTEGER_16 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_16) 0; + s = (GFC_UINTEGER_16) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -593,13 +593,13 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_16 *restrict bbase_y; - GFC_INTEGER_16 s; + const GFC_UINTEGER_16 *restrict bbase_y; + GFC_UINTEGER_16 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_16) 0; + s = (GFC_UINTEGER_16) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -609,7 +609,7 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_16)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -621,10 +621,10 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray, } else { - const GFC_INTEGER_16 *restrict abase_x; - const GFC_INTEGER_16 *restrict bbase_y; - GFC_INTEGER_16 *restrict dest_y; - GFC_INTEGER_16 s; + const GFC_UINTEGER_16 *restrict abase_x; + const GFC_UINTEGER_16 *restrict bbase_y; + GFC_UINTEGER_16 *restrict dest_y; + GFC_UINTEGER_16 s; for (y = 0; y < ycount; y++) { @@ -633,7 +633,7 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_16) 0; + s = (GFC_UINTEGER_16) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; @@ -649,17 +649,17 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray, #ifdef HAVE_AVX2 static void -matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, - gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas, +matmul_i16_avx2 (gfc_array_m16 * const restrict retarray, + gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma"))); static void -matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, - gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas, +matmul_i16_avx2 (gfc_array_m16 * const restrict retarray, + gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_16 * restrict abase; - const GFC_INTEGER_16 * restrict bbase; - GFC_INTEGER_16 * restrict dest; + const GFC_UINTEGER_16 * restrict abase; + const GFC_UINTEGER_16 * restrict bbase; + GFC_UINTEGER_16 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -701,7 +701,7 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_16)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_16)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -820,7 +820,7 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_16 one = 1, zero = 0; + const GFC_UINTEGER_16 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -858,8 +858,8 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_16 *a, *b; - GFC_INTEGER_16 *c; + const GFC_UINTEGER_16 *a, *b; + GFC_UINTEGER_16 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -867,11 +867,11 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_16 *t1; + GFC_UINTEGER_16 *t1; a = abase; b = bbase; @@ -891,7 +891,7 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_16)0; + c[i + j * c_dim1] = (GFC_UINTEGER_16)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -908,7 +908,7 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_16)); /* Start turning the crank. */ i1 = n; @@ -1126,10 +1126,10 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_16 *restrict abase_x; - const GFC_INTEGER_16 *restrict bbase_y; - GFC_INTEGER_16 *restrict dest_y; - GFC_INTEGER_16 s; + const GFC_UINTEGER_16 *restrict abase_x; + const GFC_UINTEGER_16 *restrict bbase_y; + GFC_UINTEGER_16 *restrict dest_y; + GFC_UINTEGER_16 s; for (y = 0; y < ycount; y++) { @@ -1138,7 +1138,7 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_16) 0; + s = (GFC_UINTEGER_16) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -1147,13 +1147,13 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, } else { - const GFC_INTEGER_16 *restrict bbase_y; - GFC_INTEGER_16 s; + const GFC_UINTEGER_16 *restrict bbase_y; + GFC_UINTEGER_16 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_16) 0; + s = (GFC_UINTEGER_16) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -1162,13 +1162,13 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_16 *restrict bbase_y; - GFC_INTEGER_16 s; + const GFC_UINTEGER_16 *restrict bbase_y; + GFC_UINTEGER_16 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_16) 0; + s = (GFC_UINTEGER_16) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -1178,7 +1178,7 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_16)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -1190,10 +1190,10 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, } else { - const GFC_INTEGER_16 *restrict abase_x; - const GFC_INTEGER_16 *restrict bbase_y; - GFC_INTEGER_16 *restrict dest_y; - GFC_INTEGER_16 s; + const GFC_UINTEGER_16 *restrict abase_x; + const GFC_UINTEGER_16 *restrict bbase_y; + GFC_UINTEGER_16 *restrict dest_y; + GFC_UINTEGER_16 s; for (y = 0; y < ycount; y++) { @@ -1202,7 +1202,7 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_16) 0; + s = (GFC_UINTEGER_16) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; @@ -1218,17 +1218,17 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, #ifdef HAVE_AVX512F static void -matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, - gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas, +matmul_i16_avx512f (gfc_array_m16 * const restrict retarray, + gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx512f"))); static void -matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, - gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas, +matmul_i16_avx512f (gfc_array_m16 * const restrict retarray, + gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_16 * restrict abase; - const GFC_INTEGER_16 * restrict bbase; - GFC_INTEGER_16 * restrict dest; + const GFC_UINTEGER_16 * restrict abase; + const GFC_UINTEGER_16 * restrict bbase; + GFC_UINTEGER_16 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -1270,7 +1270,7 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_16)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_16)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -1389,7 +1389,7 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_16 one = 1, zero = 0; + const GFC_UINTEGER_16 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -1427,8 +1427,8 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_16 *a, *b; - GFC_INTEGER_16 *c; + const GFC_UINTEGER_16 *a, *b; + GFC_UINTEGER_16 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -1436,11 +1436,11 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_16 *t1; + GFC_UINTEGER_16 *t1; a = abase; b = bbase; @@ -1460,7 +1460,7 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_16)0; + c[i + j * c_dim1] = (GFC_UINTEGER_16)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -1477,7 +1477,7 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_16)); /* Start turning the crank. */ i1 = n; @@ -1695,10 +1695,10 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_16 *restrict abase_x; - const GFC_INTEGER_16 *restrict bbase_y; - GFC_INTEGER_16 *restrict dest_y; - GFC_INTEGER_16 s; + const GFC_UINTEGER_16 *restrict abase_x; + const GFC_UINTEGER_16 *restrict bbase_y; + GFC_UINTEGER_16 *restrict dest_y; + GFC_UINTEGER_16 s; for (y = 0; y < ycount; y++) { @@ -1707,7 +1707,7 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_16) 0; + s = (GFC_UINTEGER_16) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -1716,13 +1716,13 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, } else { - const GFC_INTEGER_16 *restrict bbase_y; - GFC_INTEGER_16 s; + const GFC_UINTEGER_16 *restrict bbase_y; + GFC_UINTEGER_16 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_16) 0; + s = (GFC_UINTEGER_16) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -1731,13 +1731,13 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_16 *restrict bbase_y; - GFC_INTEGER_16 s; + const GFC_UINTEGER_16 *restrict bbase_y; + GFC_UINTEGER_16 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_16) 0; + s = (GFC_UINTEGER_16) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -1747,7 +1747,7 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_16)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -1759,10 +1759,10 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, } else { - const GFC_INTEGER_16 *restrict abase_x; - const GFC_INTEGER_16 *restrict bbase_y; - GFC_INTEGER_16 *restrict dest_y; - GFC_INTEGER_16 s; + const GFC_UINTEGER_16 *restrict abase_x; + const GFC_UINTEGER_16 *restrict bbase_y; + GFC_UINTEGER_16 *restrict dest_y; + GFC_UINTEGER_16 s; for (y = 0; y < ycount; y++) { @@ -1771,7 +1771,7 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_16) 0; + s = (GFC_UINTEGER_16) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; @@ -1789,29 +1789,29 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, #if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) void -matmul_i16_avx128_fma3 (gfc_array_i16 * const restrict retarray, - gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas, +matmul_i16_avx128_fma3 (gfc_array_m16 * const restrict retarray, + gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); internal_proto(matmul_i16_avx128_fma3); #endif #if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) void -matmul_i16_avx128_fma4 (gfc_array_i16 * const restrict retarray, - gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas, +matmul_i16_avx128_fma4 (gfc_array_m16 * const restrict retarray, + gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); internal_proto(matmul_i16_avx128_fma4); #endif /* Function to fall back to if there is no special processor-specific version. */ static void -matmul_i16_vanilla (gfc_array_i16 * const restrict retarray, - gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas, +matmul_i16_vanilla (gfc_array_m16 * const restrict retarray, + gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_16 * restrict abase; - const GFC_INTEGER_16 * restrict bbase; - GFC_INTEGER_16 * restrict dest; + const GFC_UINTEGER_16 * restrict abase; + const GFC_UINTEGER_16 * restrict bbase; + GFC_UINTEGER_16 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -1853,7 +1853,7 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_16)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_16)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -1972,7 +1972,7 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_16 one = 1, zero = 0; + const GFC_UINTEGER_16 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -2010,8 +2010,8 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_16 *a, *b; - GFC_INTEGER_16 *c; + const GFC_UINTEGER_16 *a, *b; + GFC_UINTEGER_16 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -2019,11 +2019,11 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_16 *t1; + GFC_UINTEGER_16 *t1; a = abase; b = bbase; @@ -2043,7 +2043,7 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_16)0; + c[i + j * c_dim1] = (GFC_UINTEGER_16)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -2060,7 +2060,7 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_16)); /* Start turning the crank. */ i1 = n; @@ -2278,10 +2278,10 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_16 *restrict abase_x; - const GFC_INTEGER_16 *restrict bbase_y; - GFC_INTEGER_16 *restrict dest_y; - GFC_INTEGER_16 s; + const GFC_UINTEGER_16 *restrict abase_x; + const GFC_UINTEGER_16 *restrict bbase_y; + GFC_UINTEGER_16 *restrict dest_y; + GFC_UINTEGER_16 s; for (y = 0; y < ycount; y++) { @@ -2290,7 +2290,7 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_16) 0; + s = (GFC_UINTEGER_16) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -2299,13 +2299,13 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray, } else { - const GFC_INTEGER_16 *restrict bbase_y; - GFC_INTEGER_16 s; + const GFC_UINTEGER_16 *restrict bbase_y; + GFC_UINTEGER_16 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_16) 0; + s = (GFC_UINTEGER_16) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -2314,13 +2314,13 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_16 *restrict bbase_y; - GFC_INTEGER_16 s; + const GFC_UINTEGER_16 *restrict bbase_y; + GFC_UINTEGER_16 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_16) 0; + s = (GFC_UINTEGER_16) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -2330,7 +2330,7 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_16)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -2342,10 +2342,10 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray, } else { - const GFC_INTEGER_16 *restrict abase_x; - const GFC_INTEGER_16 *restrict bbase_y; - GFC_INTEGER_16 *restrict dest_y; - GFC_INTEGER_16 s; + const GFC_UINTEGER_16 *restrict abase_x; + const GFC_UINTEGER_16 *restrict bbase_y; + GFC_UINTEGER_16 *restrict dest_y; + GFC_UINTEGER_16 s; for (y = 0; y < ycount; y++) { @@ -2354,7 +2354,7 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_16) 0; + s = (GFC_UINTEGER_16) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; @@ -2371,16 +2371,16 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray, /* Currently, this is i386 only. Adjust for other architectures. */ -void matmul_i16 (gfc_array_i16 * const restrict retarray, - gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas, +void matmul_i16 (gfc_array_m16 * const restrict retarray, + gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - static void (*matmul_p) (gfc_array_i16 * const restrict retarray, - gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas, + static void (*matmul_p) (gfc_array_m16 * const restrict retarray, + gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas, int blas_limit, blas_call gemm); - void (*matmul_fn) (gfc_array_i16 * const restrict retarray, - gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas, + void (*matmul_fn) (gfc_array_m16 * const restrict retarray, + gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas, int blas_limit, blas_call gemm); matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); @@ -2447,13 +2447,13 @@ void matmul_i16 (gfc_array_i16 * const restrict retarray, #else /* Just the vanilla function. */ void -matmul_i16 (gfc_array_i16 * const restrict retarray, - gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas, +matmul_i16 (gfc_array_m16 * const restrict retarray, + gfc_array_m16 * const restrict a, gfc_array_m16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_16 * restrict abase; - const GFC_INTEGER_16 * restrict bbase; - GFC_INTEGER_16 * restrict dest; + const GFC_UINTEGER_16 * restrict abase; + const GFC_UINTEGER_16 * restrict bbase; + GFC_UINTEGER_16 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -2495,7 +2495,7 @@ matmul_i16 (gfc_array_i16 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_16)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_16)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -2614,7 +2614,7 @@ matmul_i16 (gfc_array_i16 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_16 one = 1, zero = 0; + const GFC_UINTEGER_16 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -2652,8 +2652,8 @@ matmul_i16 (gfc_array_i16 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_16 *a, *b; - GFC_INTEGER_16 *c; + const GFC_UINTEGER_16 *a, *b; + GFC_UINTEGER_16 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -2661,11 +2661,11 @@ matmul_i16 (gfc_array_i16 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_16 *t1; + GFC_UINTEGER_16 *t1; a = abase; b = bbase; @@ -2685,7 +2685,7 @@ matmul_i16 (gfc_array_i16 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_16)0; + c[i + j * c_dim1] = (GFC_UINTEGER_16)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -2702,7 +2702,7 @@ matmul_i16 (gfc_array_i16 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_16)); /* Start turning the crank. */ i1 = n; @@ -2920,10 +2920,10 @@ matmul_i16 (gfc_array_i16 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_16 *restrict abase_x; - const GFC_INTEGER_16 *restrict bbase_y; - GFC_INTEGER_16 *restrict dest_y; - GFC_INTEGER_16 s; + const GFC_UINTEGER_16 *restrict abase_x; + const GFC_UINTEGER_16 *restrict bbase_y; + GFC_UINTEGER_16 *restrict dest_y; + GFC_UINTEGER_16 s; for (y = 0; y < ycount; y++) { @@ -2932,7 +2932,7 @@ matmul_i16 (gfc_array_i16 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_16) 0; + s = (GFC_UINTEGER_16) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -2941,13 +2941,13 @@ matmul_i16 (gfc_array_i16 * const restrict retarray, } else { - const GFC_INTEGER_16 *restrict bbase_y; - GFC_INTEGER_16 s; + const GFC_UINTEGER_16 *restrict bbase_y; + GFC_UINTEGER_16 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_16) 0; + s = (GFC_UINTEGER_16) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -2956,13 +2956,13 @@ matmul_i16 (gfc_array_i16 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_16 *restrict bbase_y; - GFC_INTEGER_16 s; + const GFC_UINTEGER_16 *restrict bbase_y; + GFC_UINTEGER_16 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_16) 0; + s = (GFC_UINTEGER_16) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -2972,7 +2972,7 @@ matmul_i16 (gfc_array_i16 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_16)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -2984,10 +2984,10 @@ matmul_i16 (gfc_array_i16 * const restrict retarray, } else { - const GFC_INTEGER_16 *restrict abase_x; - const GFC_INTEGER_16 *restrict bbase_y; - GFC_INTEGER_16 *restrict dest_y; - GFC_INTEGER_16 s; + const GFC_UINTEGER_16 *restrict abase_x; + const GFC_UINTEGER_16 *restrict bbase_y; + GFC_UINTEGER_16 *restrict dest_y; + GFC_UINTEGER_16 s; for (y = 0; y < ycount; y++) { @@ -2996,7 +2996,7 @@ matmul_i16 (gfc_array_i16 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_16) 0; + s = (GFC_UINTEGER_16) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; diff --git a/libgfortran/generated/matmul_i2.c b/libgfortran/generated/matmul_i2.c index 1b727e46588b..89e326e6be5e 100644 --- a/libgfortran/generated/matmul_i2.c +++ b/libgfortran/generated/matmul_i2.c @@ -28,17 +28,17 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #include -#if defined (HAVE_GFC_INTEGER_2) +#if defined (HAVE_GFC_UINTEGER_2) /* Prototype for the BLAS ?gemm subroutine, a pointer to which can be passed to us by the front-end, in which case we call it for large matrices. */ typedef void (*blas_call)(const char *, const char *, const int *, const int *, - const int *, const GFC_INTEGER_2 *, const GFC_INTEGER_2 *, - const int *, const GFC_INTEGER_2 *, const int *, - const GFC_INTEGER_2 *, GFC_INTEGER_2 *, const int *, - int, int); + const int *, const GFC_UINTEGER_2 *, const GFC_UINTEGER_2 *, + const int *, const GFC_UINTEGER_2 *, const int *, + const GFC_UINTEGER_2 *, GFC_UINTEGER_2 *, const int *, + int, int); /* The order of loops is different in the case of plain matrix multiplication C=MATMUL(A,B), and in the frequent special case where @@ -69,8 +69,8 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *, see if there is a way to perform the matrix multiplication by a call to the BLAS gemm function. */ -extern void matmul_i2 (gfc_array_i2 * const restrict retarray, - gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas, +extern void matmul_i2 (gfc_array_m2 * const restrict retarray, + gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas, int blas_limit, blas_call gemm); export_proto(matmul_i2); @@ -80,17 +80,17 @@ export_proto(matmul_i2); #ifdef HAVE_AVX static void -matmul_i2_avx (gfc_array_i2 * const restrict retarray, - gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas, +matmul_i2_avx (gfc_array_m2 * const restrict retarray, + gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx"))); static void -matmul_i2_avx (gfc_array_i2 * const restrict retarray, - gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas, +matmul_i2_avx (gfc_array_m2 * const restrict retarray, + gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_2 * restrict abase; - const GFC_INTEGER_2 * restrict bbase; - GFC_INTEGER_2 * restrict dest; + const GFC_UINTEGER_2 * restrict abase; + const GFC_UINTEGER_2 * restrict bbase; + GFC_UINTEGER_2 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -132,7 +132,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_2)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -251,7 +251,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_2 one = 1, zero = 0; + const GFC_UINTEGER_2 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -289,8 +289,8 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_2 *a, *b; - GFC_INTEGER_2 *c; + const GFC_UINTEGER_2 *a, *b; + GFC_UINTEGER_2 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -298,11 +298,11 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_2 *t1; + GFC_UINTEGER_2 *t1; a = abase; b = bbase; @@ -322,7 +322,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_2)0; + c[i + j * c_dim1] = (GFC_UINTEGER_2)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -339,7 +339,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_2)); /* Start turning the crank. */ i1 = n; @@ -557,10 +557,10 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_2 *restrict abase_x; - const GFC_INTEGER_2 *restrict bbase_y; - GFC_INTEGER_2 *restrict dest_y; - GFC_INTEGER_2 s; + const GFC_UINTEGER_2 *restrict abase_x; + const GFC_UINTEGER_2 *restrict bbase_y; + GFC_UINTEGER_2 *restrict dest_y; + GFC_UINTEGER_2 s; for (y = 0; y < ycount; y++) { @@ -569,7 +569,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_2) 0; + s = (GFC_UINTEGER_2) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -578,13 +578,13 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray, } else { - const GFC_INTEGER_2 *restrict bbase_y; - GFC_INTEGER_2 s; + const GFC_UINTEGER_2 *restrict bbase_y; + GFC_UINTEGER_2 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_2) 0; + s = (GFC_UINTEGER_2) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -593,13 +593,13 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_2 *restrict bbase_y; - GFC_INTEGER_2 s; + const GFC_UINTEGER_2 *restrict bbase_y; + GFC_UINTEGER_2 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_2) 0; + s = (GFC_UINTEGER_2) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -609,7 +609,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_2)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -621,10 +621,10 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray, } else { - const GFC_INTEGER_2 *restrict abase_x; - const GFC_INTEGER_2 *restrict bbase_y; - GFC_INTEGER_2 *restrict dest_y; - GFC_INTEGER_2 s; + const GFC_UINTEGER_2 *restrict abase_x; + const GFC_UINTEGER_2 *restrict bbase_y; + GFC_UINTEGER_2 *restrict dest_y; + GFC_UINTEGER_2 s; for (y = 0; y < ycount; y++) { @@ -633,7 +633,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_2) 0; + s = (GFC_UINTEGER_2) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; @@ -649,17 +649,17 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray, #ifdef HAVE_AVX2 static void -matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, - gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas, +matmul_i2_avx2 (gfc_array_m2 * const restrict retarray, + gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma"))); static void -matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, - gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas, +matmul_i2_avx2 (gfc_array_m2 * const restrict retarray, + gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_2 * restrict abase; - const GFC_INTEGER_2 * restrict bbase; - GFC_INTEGER_2 * restrict dest; + const GFC_UINTEGER_2 * restrict abase; + const GFC_UINTEGER_2 * restrict bbase; + GFC_UINTEGER_2 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -701,7 +701,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_2)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -820,7 +820,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_2 one = 1, zero = 0; + const GFC_UINTEGER_2 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -858,8 +858,8 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_2 *a, *b; - GFC_INTEGER_2 *c; + const GFC_UINTEGER_2 *a, *b; + GFC_UINTEGER_2 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -867,11 +867,11 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_2 *t1; + GFC_UINTEGER_2 *t1; a = abase; b = bbase; @@ -891,7 +891,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_2)0; + c[i + j * c_dim1] = (GFC_UINTEGER_2)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -908,7 +908,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_2)); /* Start turning the crank. */ i1 = n; @@ -1126,10 +1126,10 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_2 *restrict abase_x; - const GFC_INTEGER_2 *restrict bbase_y; - GFC_INTEGER_2 *restrict dest_y; - GFC_INTEGER_2 s; + const GFC_UINTEGER_2 *restrict abase_x; + const GFC_UINTEGER_2 *restrict bbase_y; + GFC_UINTEGER_2 *restrict dest_y; + GFC_UINTEGER_2 s; for (y = 0; y < ycount; y++) { @@ -1138,7 +1138,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_2) 0; + s = (GFC_UINTEGER_2) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -1147,13 +1147,13 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, } else { - const GFC_INTEGER_2 *restrict bbase_y; - GFC_INTEGER_2 s; + const GFC_UINTEGER_2 *restrict bbase_y; + GFC_UINTEGER_2 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_2) 0; + s = (GFC_UINTEGER_2) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -1162,13 +1162,13 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_2 *restrict bbase_y; - GFC_INTEGER_2 s; + const GFC_UINTEGER_2 *restrict bbase_y; + GFC_UINTEGER_2 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_2) 0; + s = (GFC_UINTEGER_2) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -1178,7 +1178,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_2)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -1190,10 +1190,10 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, } else { - const GFC_INTEGER_2 *restrict abase_x; - const GFC_INTEGER_2 *restrict bbase_y; - GFC_INTEGER_2 *restrict dest_y; - GFC_INTEGER_2 s; + const GFC_UINTEGER_2 *restrict abase_x; + const GFC_UINTEGER_2 *restrict bbase_y; + GFC_UINTEGER_2 *restrict dest_y; + GFC_UINTEGER_2 s; for (y = 0; y < ycount; y++) { @@ -1202,7 +1202,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_2) 0; + s = (GFC_UINTEGER_2) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; @@ -1218,17 +1218,17 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, #ifdef HAVE_AVX512F static void -matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, - gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas, +matmul_i2_avx512f (gfc_array_m2 * const restrict retarray, + gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx512f"))); static void -matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, - gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas, +matmul_i2_avx512f (gfc_array_m2 * const restrict retarray, + gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_2 * restrict abase; - const GFC_INTEGER_2 * restrict bbase; - GFC_INTEGER_2 * restrict dest; + const GFC_UINTEGER_2 * restrict abase; + const GFC_UINTEGER_2 * restrict bbase; + GFC_UINTEGER_2 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -1270,7 +1270,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_2)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -1389,7 +1389,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_2 one = 1, zero = 0; + const GFC_UINTEGER_2 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -1427,8 +1427,8 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_2 *a, *b; - GFC_INTEGER_2 *c; + const GFC_UINTEGER_2 *a, *b; + GFC_UINTEGER_2 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -1436,11 +1436,11 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_2 *t1; + GFC_UINTEGER_2 *t1; a = abase; b = bbase; @@ -1460,7 +1460,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_2)0; + c[i + j * c_dim1] = (GFC_UINTEGER_2)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -1477,7 +1477,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_2)); /* Start turning the crank. */ i1 = n; @@ -1695,10 +1695,10 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_2 *restrict abase_x; - const GFC_INTEGER_2 *restrict bbase_y; - GFC_INTEGER_2 *restrict dest_y; - GFC_INTEGER_2 s; + const GFC_UINTEGER_2 *restrict abase_x; + const GFC_UINTEGER_2 *restrict bbase_y; + GFC_UINTEGER_2 *restrict dest_y; + GFC_UINTEGER_2 s; for (y = 0; y < ycount; y++) { @@ -1707,7 +1707,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_2) 0; + s = (GFC_UINTEGER_2) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -1716,13 +1716,13 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, } else { - const GFC_INTEGER_2 *restrict bbase_y; - GFC_INTEGER_2 s; + const GFC_UINTEGER_2 *restrict bbase_y; + GFC_UINTEGER_2 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_2) 0; + s = (GFC_UINTEGER_2) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -1731,13 +1731,13 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_2 *restrict bbase_y; - GFC_INTEGER_2 s; + const GFC_UINTEGER_2 *restrict bbase_y; + GFC_UINTEGER_2 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_2) 0; + s = (GFC_UINTEGER_2) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -1747,7 +1747,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_2)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -1759,10 +1759,10 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, } else { - const GFC_INTEGER_2 *restrict abase_x; - const GFC_INTEGER_2 *restrict bbase_y; - GFC_INTEGER_2 *restrict dest_y; - GFC_INTEGER_2 s; + const GFC_UINTEGER_2 *restrict abase_x; + const GFC_UINTEGER_2 *restrict bbase_y; + GFC_UINTEGER_2 *restrict dest_y; + GFC_UINTEGER_2 s; for (y = 0; y < ycount; y++) { @@ -1771,7 +1771,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_2) 0; + s = (GFC_UINTEGER_2) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; @@ -1789,29 +1789,29 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, #if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) void -matmul_i2_avx128_fma3 (gfc_array_i2 * const restrict retarray, - gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas, +matmul_i2_avx128_fma3 (gfc_array_m2 * const restrict retarray, + gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); internal_proto(matmul_i2_avx128_fma3); #endif #if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) void -matmul_i2_avx128_fma4 (gfc_array_i2 * const restrict retarray, - gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas, +matmul_i2_avx128_fma4 (gfc_array_m2 * const restrict retarray, + gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); internal_proto(matmul_i2_avx128_fma4); #endif /* Function to fall back to if there is no special processor-specific version. */ static void -matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, - gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas, +matmul_i2_vanilla (gfc_array_m2 * const restrict retarray, + gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_2 * restrict abase; - const GFC_INTEGER_2 * restrict bbase; - GFC_INTEGER_2 * restrict dest; + const GFC_UINTEGER_2 * restrict abase; + const GFC_UINTEGER_2 * restrict bbase; + GFC_UINTEGER_2 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -1853,7 +1853,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_2)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -1972,7 +1972,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_2 one = 1, zero = 0; + const GFC_UINTEGER_2 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -2010,8 +2010,8 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_2 *a, *b; - GFC_INTEGER_2 *c; + const GFC_UINTEGER_2 *a, *b; + GFC_UINTEGER_2 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -2019,11 +2019,11 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_2 *t1; + GFC_UINTEGER_2 *t1; a = abase; b = bbase; @@ -2043,7 +2043,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_2)0; + c[i + j * c_dim1] = (GFC_UINTEGER_2)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -2060,7 +2060,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_2)); /* Start turning the crank. */ i1 = n; @@ -2278,10 +2278,10 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_2 *restrict abase_x; - const GFC_INTEGER_2 *restrict bbase_y; - GFC_INTEGER_2 *restrict dest_y; - GFC_INTEGER_2 s; + const GFC_UINTEGER_2 *restrict abase_x; + const GFC_UINTEGER_2 *restrict bbase_y; + GFC_UINTEGER_2 *restrict dest_y; + GFC_UINTEGER_2 s; for (y = 0; y < ycount; y++) { @@ -2290,7 +2290,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_2) 0; + s = (GFC_UINTEGER_2) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -2299,13 +2299,13 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, } else { - const GFC_INTEGER_2 *restrict bbase_y; - GFC_INTEGER_2 s; + const GFC_UINTEGER_2 *restrict bbase_y; + GFC_UINTEGER_2 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_2) 0; + s = (GFC_UINTEGER_2) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -2314,13 +2314,13 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_2 *restrict bbase_y; - GFC_INTEGER_2 s; + const GFC_UINTEGER_2 *restrict bbase_y; + GFC_UINTEGER_2 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_2) 0; + s = (GFC_UINTEGER_2) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -2330,7 +2330,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_2)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -2342,10 +2342,10 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, } else { - const GFC_INTEGER_2 *restrict abase_x; - const GFC_INTEGER_2 *restrict bbase_y; - GFC_INTEGER_2 *restrict dest_y; - GFC_INTEGER_2 s; + const GFC_UINTEGER_2 *restrict abase_x; + const GFC_UINTEGER_2 *restrict bbase_y; + GFC_UINTEGER_2 *restrict dest_y; + GFC_UINTEGER_2 s; for (y = 0; y < ycount; y++) { @@ -2354,7 +2354,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_2) 0; + s = (GFC_UINTEGER_2) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; @@ -2371,16 +2371,16 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, /* Currently, this is i386 only. Adjust for other architectures. */ -void matmul_i2 (gfc_array_i2 * const restrict retarray, - gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas, +void matmul_i2 (gfc_array_m2 * const restrict retarray, + gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - static void (*matmul_p) (gfc_array_i2 * const restrict retarray, - gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas, + static void (*matmul_p) (gfc_array_m2 * const restrict retarray, + gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas, int blas_limit, blas_call gemm); - void (*matmul_fn) (gfc_array_i2 * const restrict retarray, - gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas, + void (*matmul_fn) (gfc_array_m2 * const restrict retarray, + gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas, int blas_limit, blas_call gemm); matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); @@ -2447,13 +2447,13 @@ void matmul_i2 (gfc_array_i2 * const restrict retarray, #else /* Just the vanilla function. */ void -matmul_i2 (gfc_array_i2 * const restrict retarray, - gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas, +matmul_i2 (gfc_array_m2 * const restrict retarray, + gfc_array_m2 * const restrict a, gfc_array_m2 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_2 * restrict abase; - const GFC_INTEGER_2 * restrict bbase; - GFC_INTEGER_2 * restrict dest; + const GFC_UINTEGER_2 * restrict abase; + const GFC_UINTEGER_2 * restrict bbase; + GFC_UINTEGER_2 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -2495,7 +2495,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_2)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -2614,7 +2614,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_2 one = 1, zero = 0; + const GFC_UINTEGER_2 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -2652,8 +2652,8 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_2 *a, *b; - GFC_INTEGER_2 *c; + const GFC_UINTEGER_2 *a, *b; + GFC_UINTEGER_2 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -2661,11 +2661,11 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_2 *t1; + GFC_UINTEGER_2 *t1; a = abase; b = bbase; @@ -2685,7 +2685,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_2)0; + c[i + j * c_dim1] = (GFC_UINTEGER_2)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -2702,7 +2702,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_2)); /* Start turning the crank. */ i1 = n; @@ -2920,10 +2920,10 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_2 *restrict abase_x; - const GFC_INTEGER_2 *restrict bbase_y; - GFC_INTEGER_2 *restrict dest_y; - GFC_INTEGER_2 s; + const GFC_UINTEGER_2 *restrict abase_x; + const GFC_UINTEGER_2 *restrict bbase_y; + GFC_UINTEGER_2 *restrict dest_y; + GFC_UINTEGER_2 s; for (y = 0; y < ycount; y++) { @@ -2932,7 +2932,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_2) 0; + s = (GFC_UINTEGER_2) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -2941,13 +2941,13 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, } else { - const GFC_INTEGER_2 *restrict bbase_y; - GFC_INTEGER_2 s; + const GFC_UINTEGER_2 *restrict bbase_y; + GFC_UINTEGER_2 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_2) 0; + s = (GFC_UINTEGER_2) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -2956,13 +2956,13 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_2 *restrict bbase_y; - GFC_INTEGER_2 s; + const GFC_UINTEGER_2 *restrict bbase_y; + GFC_UINTEGER_2 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_2) 0; + s = (GFC_UINTEGER_2) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -2972,7 +2972,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_2)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -2984,10 +2984,10 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, } else { - const GFC_INTEGER_2 *restrict abase_x; - const GFC_INTEGER_2 *restrict bbase_y; - GFC_INTEGER_2 *restrict dest_y; - GFC_INTEGER_2 s; + const GFC_UINTEGER_2 *restrict abase_x; + const GFC_UINTEGER_2 *restrict bbase_y; + GFC_UINTEGER_2 *restrict dest_y; + GFC_UINTEGER_2 s; for (y = 0; y < ycount; y++) { @@ -2996,7 +2996,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_2) 0; + s = (GFC_UINTEGER_2) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; diff --git a/libgfortran/generated/matmul_i4.c b/libgfortran/generated/matmul_i4.c index ba421d72c356..2601f6453b71 100644 --- a/libgfortran/generated/matmul_i4.c +++ b/libgfortran/generated/matmul_i4.c @@ -28,17 +28,17 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #include -#if defined (HAVE_GFC_INTEGER_4) +#if defined (HAVE_GFC_UINTEGER_4) /* Prototype for the BLAS ?gemm subroutine, a pointer to which can be passed to us by the front-end, in which case we call it for large matrices. */ typedef void (*blas_call)(const char *, const char *, const int *, const int *, - const int *, const GFC_INTEGER_4 *, const GFC_INTEGER_4 *, - const int *, const GFC_INTEGER_4 *, const int *, - const GFC_INTEGER_4 *, GFC_INTEGER_4 *, const int *, - int, int); + const int *, const GFC_UINTEGER_4 *, const GFC_UINTEGER_4 *, + const int *, const GFC_UINTEGER_4 *, const int *, + const GFC_UINTEGER_4 *, GFC_UINTEGER_4 *, const int *, + int, int); /* The order of loops is different in the case of plain matrix multiplication C=MATMUL(A,B), and in the frequent special case where @@ -69,8 +69,8 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *, see if there is a way to perform the matrix multiplication by a call to the BLAS gemm function. */ -extern void matmul_i4 (gfc_array_i4 * const restrict retarray, - gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas, +extern void matmul_i4 (gfc_array_m4 * const restrict retarray, + gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas, int blas_limit, blas_call gemm); export_proto(matmul_i4); @@ -80,17 +80,17 @@ export_proto(matmul_i4); #ifdef HAVE_AVX static void -matmul_i4_avx (gfc_array_i4 * const restrict retarray, - gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas, +matmul_i4_avx (gfc_array_m4 * const restrict retarray, + gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx"))); static void -matmul_i4_avx (gfc_array_i4 * const restrict retarray, - gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas, +matmul_i4_avx (gfc_array_m4 * const restrict retarray, + gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_4 * restrict abase; - const GFC_INTEGER_4 * restrict bbase; - GFC_INTEGER_4 * restrict dest; + const GFC_UINTEGER_4 * restrict abase; + const GFC_UINTEGER_4 * restrict bbase; + GFC_UINTEGER_4 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -132,7 +132,7 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_4)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_4)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -251,7 +251,7 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_4 one = 1, zero = 0; + const GFC_UINTEGER_4 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -289,8 +289,8 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_4 *a, *b; - GFC_INTEGER_4 *c; + const GFC_UINTEGER_4 *a, *b; + GFC_UINTEGER_4 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -298,11 +298,11 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_4 *t1; + GFC_UINTEGER_4 *t1; a = abase; b = bbase; @@ -322,7 +322,7 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_4)0; + c[i + j * c_dim1] = (GFC_UINTEGER_4)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -339,7 +339,7 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_4)); /* Start turning the crank. */ i1 = n; @@ -557,10 +557,10 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_4 *restrict abase_x; - const GFC_INTEGER_4 *restrict bbase_y; - GFC_INTEGER_4 *restrict dest_y; - GFC_INTEGER_4 s; + const GFC_UINTEGER_4 *restrict abase_x; + const GFC_UINTEGER_4 *restrict bbase_y; + GFC_UINTEGER_4 *restrict dest_y; + GFC_UINTEGER_4 s; for (y = 0; y < ycount; y++) { @@ -569,7 +569,7 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_4) 0; + s = (GFC_UINTEGER_4) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -578,13 +578,13 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray, } else { - const GFC_INTEGER_4 *restrict bbase_y; - GFC_INTEGER_4 s; + const GFC_UINTEGER_4 *restrict bbase_y; + GFC_UINTEGER_4 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_4) 0; + s = (GFC_UINTEGER_4) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -593,13 +593,13 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_4 *restrict bbase_y; - GFC_INTEGER_4 s; + const GFC_UINTEGER_4 *restrict bbase_y; + GFC_UINTEGER_4 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_4) 0; + s = (GFC_UINTEGER_4) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -609,7 +609,7 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_4)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -621,10 +621,10 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray, } else { - const GFC_INTEGER_4 *restrict abase_x; - const GFC_INTEGER_4 *restrict bbase_y; - GFC_INTEGER_4 *restrict dest_y; - GFC_INTEGER_4 s; + const GFC_UINTEGER_4 *restrict abase_x; + const GFC_UINTEGER_4 *restrict bbase_y; + GFC_UINTEGER_4 *restrict dest_y; + GFC_UINTEGER_4 s; for (y = 0; y < ycount; y++) { @@ -633,7 +633,7 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_4) 0; + s = (GFC_UINTEGER_4) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; @@ -649,17 +649,17 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray, #ifdef HAVE_AVX2 static void -matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, - gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas, +matmul_i4_avx2 (gfc_array_m4 * const restrict retarray, + gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma"))); static void -matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, - gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas, +matmul_i4_avx2 (gfc_array_m4 * const restrict retarray, + gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_4 * restrict abase; - const GFC_INTEGER_4 * restrict bbase; - GFC_INTEGER_4 * restrict dest; + const GFC_UINTEGER_4 * restrict abase; + const GFC_UINTEGER_4 * restrict bbase; + GFC_UINTEGER_4 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -701,7 +701,7 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_4)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_4)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -820,7 +820,7 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_4 one = 1, zero = 0; + const GFC_UINTEGER_4 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -858,8 +858,8 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_4 *a, *b; - GFC_INTEGER_4 *c; + const GFC_UINTEGER_4 *a, *b; + GFC_UINTEGER_4 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -867,11 +867,11 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_4 *t1; + GFC_UINTEGER_4 *t1; a = abase; b = bbase; @@ -891,7 +891,7 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_4)0; + c[i + j * c_dim1] = (GFC_UINTEGER_4)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -908,7 +908,7 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_4)); /* Start turning the crank. */ i1 = n; @@ -1126,10 +1126,10 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_4 *restrict abase_x; - const GFC_INTEGER_4 *restrict bbase_y; - GFC_INTEGER_4 *restrict dest_y; - GFC_INTEGER_4 s; + const GFC_UINTEGER_4 *restrict abase_x; + const GFC_UINTEGER_4 *restrict bbase_y; + GFC_UINTEGER_4 *restrict dest_y; + GFC_UINTEGER_4 s; for (y = 0; y < ycount; y++) { @@ -1138,7 +1138,7 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_4) 0; + s = (GFC_UINTEGER_4) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -1147,13 +1147,13 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, } else { - const GFC_INTEGER_4 *restrict bbase_y; - GFC_INTEGER_4 s; + const GFC_UINTEGER_4 *restrict bbase_y; + GFC_UINTEGER_4 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_4) 0; + s = (GFC_UINTEGER_4) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -1162,13 +1162,13 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_4 *restrict bbase_y; - GFC_INTEGER_4 s; + const GFC_UINTEGER_4 *restrict bbase_y; + GFC_UINTEGER_4 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_4) 0; + s = (GFC_UINTEGER_4) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -1178,7 +1178,7 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_4)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -1190,10 +1190,10 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, } else { - const GFC_INTEGER_4 *restrict abase_x; - const GFC_INTEGER_4 *restrict bbase_y; - GFC_INTEGER_4 *restrict dest_y; - GFC_INTEGER_4 s; + const GFC_UINTEGER_4 *restrict abase_x; + const GFC_UINTEGER_4 *restrict bbase_y; + GFC_UINTEGER_4 *restrict dest_y; + GFC_UINTEGER_4 s; for (y = 0; y < ycount; y++) { @@ -1202,7 +1202,7 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_4) 0; + s = (GFC_UINTEGER_4) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; @@ -1218,17 +1218,17 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, #ifdef HAVE_AVX512F static void -matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, - gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas, +matmul_i4_avx512f (gfc_array_m4 * const restrict retarray, + gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx512f"))); static void -matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, - gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas, +matmul_i4_avx512f (gfc_array_m4 * const restrict retarray, + gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_4 * restrict abase; - const GFC_INTEGER_4 * restrict bbase; - GFC_INTEGER_4 * restrict dest; + const GFC_UINTEGER_4 * restrict abase; + const GFC_UINTEGER_4 * restrict bbase; + GFC_UINTEGER_4 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -1270,7 +1270,7 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_4)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_4)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -1389,7 +1389,7 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_4 one = 1, zero = 0; + const GFC_UINTEGER_4 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -1427,8 +1427,8 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_4 *a, *b; - GFC_INTEGER_4 *c; + const GFC_UINTEGER_4 *a, *b; + GFC_UINTEGER_4 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -1436,11 +1436,11 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_4 *t1; + GFC_UINTEGER_4 *t1; a = abase; b = bbase; @@ -1460,7 +1460,7 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_4)0; + c[i + j * c_dim1] = (GFC_UINTEGER_4)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -1477,7 +1477,7 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_4)); /* Start turning the crank. */ i1 = n; @@ -1695,10 +1695,10 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_4 *restrict abase_x; - const GFC_INTEGER_4 *restrict bbase_y; - GFC_INTEGER_4 *restrict dest_y; - GFC_INTEGER_4 s; + const GFC_UINTEGER_4 *restrict abase_x; + const GFC_UINTEGER_4 *restrict bbase_y; + GFC_UINTEGER_4 *restrict dest_y; + GFC_UINTEGER_4 s; for (y = 0; y < ycount; y++) { @@ -1707,7 +1707,7 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_4) 0; + s = (GFC_UINTEGER_4) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -1716,13 +1716,13 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, } else { - const GFC_INTEGER_4 *restrict bbase_y; - GFC_INTEGER_4 s; + const GFC_UINTEGER_4 *restrict bbase_y; + GFC_UINTEGER_4 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_4) 0; + s = (GFC_UINTEGER_4) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -1731,13 +1731,13 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_4 *restrict bbase_y; - GFC_INTEGER_4 s; + const GFC_UINTEGER_4 *restrict bbase_y; + GFC_UINTEGER_4 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_4) 0; + s = (GFC_UINTEGER_4) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -1747,7 +1747,7 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_4)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -1759,10 +1759,10 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, } else { - const GFC_INTEGER_4 *restrict abase_x; - const GFC_INTEGER_4 *restrict bbase_y; - GFC_INTEGER_4 *restrict dest_y; - GFC_INTEGER_4 s; + const GFC_UINTEGER_4 *restrict abase_x; + const GFC_UINTEGER_4 *restrict bbase_y; + GFC_UINTEGER_4 *restrict dest_y; + GFC_UINTEGER_4 s; for (y = 0; y < ycount; y++) { @@ -1771,7 +1771,7 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_4) 0; + s = (GFC_UINTEGER_4) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; @@ -1789,29 +1789,29 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, #if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) void -matmul_i4_avx128_fma3 (gfc_array_i4 * const restrict retarray, - gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas, +matmul_i4_avx128_fma3 (gfc_array_m4 * const restrict retarray, + gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); internal_proto(matmul_i4_avx128_fma3); #endif #if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) void -matmul_i4_avx128_fma4 (gfc_array_i4 * const restrict retarray, - gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas, +matmul_i4_avx128_fma4 (gfc_array_m4 * const restrict retarray, + gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); internal_proto(matmul_i4_avx128_fma4); #endif /* Function to fall back to if there is no special processor-specific version. */ static void -matmul_i4_vanilla (gfc_array_i4 * const restrict retarray, - gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas, +matmul_i4_vanilla (gfc_array_m4 * const restrict retarray, + gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_4 * restrict abase; - const GFC_INTEGER_4 * restrict bbase; - GFC_INTEGER_4 * restrict dest; + const GFC_UINTEGER_4 * restrict abase; + const GFC_UINTEGER_4 * restrict bbase; + GFC_UINTEGER_4 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -1853,7 +1853,7 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_4)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_4)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -1972,7 +1972,7 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_4 one = 1, zero = 0; + const GFC_UINTEGER_4 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -2010,8 +2010,8 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_4 *a, *b; - GFC_INTEGER_4 *c; + const GFC_UINTEGER_4 *a, *b; + GFC_UINTEGER_4 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -2019,11 +2019,11 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_4 *t1; + GFC_UINTEGER_4 *t1; a = abase; b = bbase; @@ -2043,7 +2043,7 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_4)0; + c[i + j * c_dim1] = (GFC_UINTEGER_4)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -2060,7 +2060,7 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_4)); /* Start turning the crank. */ i1 = n; @@ -2278,10 +2278,10 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_4 *restrict abase_x; - const GFC_INTEGER_4 *restrict bbase_y; - GFC_INTEGER_4 *restrict dest_y; - GFC_INTEGER_4 s; + const GFC_UINTEGER_4 *restrict abase_x; + const GFC_UINTEGER_4 *restrict bbase_y; + GFC_UINTEGER_4 *restrict dest_y; + GFC_UINTEGER_4 s; for (y = 0; y < ycount; y++) { @@ -2290,7 +2290,7 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_4) 0; + s = (GFC_UINTEGER_4) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -2299,13 +2299,13 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray, } else { - const GFC_INTEGER_4 *restrict bbase_y; - GFC_INTEGER_4 s; + const GFC_UINTEGER_4 *restrict bbase_y; + GFC_UINTEGER_4 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_4) 0; + s = (GFC_UINTEGER_4) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -2314,13 +2314,13 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_4 *restrict bbase_y; - GFC_INTEGER_4 s; + const GFC_UINTEGER_4 *restrict bbase_y; + GFC_UINTEGER_4 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_4) 0; + s = (GFC_UINTEGER_4) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -2330,7 +2330,7 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_4)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -2342,10 +2342,10 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray, } else { - const GFC_INTEGER_4 *restrict abase_x; - const GFC_INTEGER_4 *restrict bbase_y; - GFC_INTEGER_4 *restrict dest_y; - GFC_INTEGER_4 s; + const GFC_UINTEGER_4 *restrict abase_x; + const GFC_UINTEGER_4 *restrict bbase_y; + GFC_UINTEGER_4 *restrict dest_y; + GFC_UINTEGER_4 s; for (y = 0; y < ycount; y++) { @@ -2354,7 +2354,7 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_4) 0; + s = (GFC_UINTEGER_4) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; @@ -2371,16 +2371,16 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray, /* Currently, this is i386 only. Adjust for other architectures. */ -void matmul_i4 (gfc_array_i4 * const restrict retarray, - gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas, +void matmul_i4 (gfc_array_m4 * const restrict retarray, + gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - static void (*matmul_p) (gfc_array_i4 * const restrict retarray, - gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas, + static void (*matmul_p) (gfc_array_m4 * const restrict retarray, + gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas, int blas_limit, blas_call gemm); - void (*matmul_fn) (gfc_array_i4 * const restrict retarray, - gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas, + void (*matmul_fn) (gfc_array_m4 * const restrict retarray, + gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas, int blas_limit, blas_call gemm); matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); @@ -2447,13 +2447,13 @@ void matmul_i4 (gfc_array_i4 * const restrict retarray, #else /* Just the vanilla function. */ void -matmul_i4 (gfc_array_i4 * const restrict retarray, - gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas, +matmul_i4 (gfc_array_m4 * const restrict retarray, + gfc_array_m4 * const restrict a, gfc_array_m4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_4 * restrict abase; - const GFC_INTEGER_4 * restrict bbase; - GFC_INTEGER_4 * restrict dest; + const GFC_UINTEGER_4 * restrict abase; + const GFC_UINTEGER_4 * restrict bbase; + GFC_UINTEGER_4 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -2495,7 +2495,7 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_4)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_4)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -2614,7 +2614,7 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_4 one = 1, zero = 0; + const GFC_UINTEGER_4 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -2652,8 +2652,8 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_4 *a, *b; - GFC_INTEGER_4 *c; + const GFC_UINTEGER_4 *a, *b; + GFC_UINTEGER_4 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -2661,11 +2661,11 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_4 *t1; + GFC_UINTEGER_4 *t1; a = abase; b = bbase; @@ -2685,7 +2685,7 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_4)0; + c[i + j * c_dim1] = (GFC_UINTEGER_4)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -2702,7 +2702,7 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_4)); /* Start turning the crank. */ i1 = n; @@ -2920,10 +2920,10 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_4 *restrict abase_x; - const GFC_INTEGER_4 *restrict bbase_y; - GFC_INTEGER_4 *restrict dest_y; - GFC_INTEGER_4 s; + const GFC_UINTEGER_4 *restrict abase_x; + const GFC_UINTEGER_4 *restrict bbase_y; + GFC_UINTEGER_4 *restrict dest_y; + GFC_UINTEGER_4 s; for (y = 0; y < ycount; y++) { @@ -2932,7 +2932,7 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_4) 0; + s = (GFC_UINTEGER_4) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -2941,13 +2941,13 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, } else { - const GFC_INTEGER_4 *restrict bbase_y; - GFC_INTEGER_4 s; + const GFC_UINTEGER_4 *restrict bbase_y; + GFC_UINTEGER_4 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_4) 0; + s = (GFC_UINTEGER_4) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -2956,13 +2956,13 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_4 *restrict bbase_y; - GFC_INTEGER_4 s; + const GFC_UINTEGER_4 *restrict bbase_y; + GFC_UINTEGER_4 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_4) 0; + s = (GFC_UINTEGER_4) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -2972,7 +2972,7 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_4)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -2984,10 +2984,10 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, } else { - const GFC_INTEGER_4 *restrict abase_x; - const GFC_INTEGER_4 *restrict bbase_y; - GFC_INTEGER_4 *restrict dest_y; - GFC_INTEGER_4 s; + const GFC_UINTEGER_4 *restrict abase_x; + const GFC_UINTEGER_4 *restrict bbase_y; + GFC_UINTEGER_4 *restrict dest_y; + GFC_UINTEGER_4 s; for (y = 0; y < ycount; y++) { @@ -2996,7 +2996,7 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_4) 0; + s = (GFC_UINTEGER_4) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; diff --git a/libgfortran/generated/matmul_i8.c b/libgfortran/generated/matmul_i8.c index 9405abc23b8a..96ef7e694569 100644 --- a/libgfortran/generated/matmul_i8.c +++ b/libgfortran/generated/matmul_i8.c @@ -28,17 +28,17 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #include -#if defined (HAVE_GFC_INTEGER_8) +#if defined (HAVE_GFC_UINTEGER_8) /* Prototype for the BLAS ?gemm subroutine, a pointer to which can be passed to us by the front-end, in which case we call it for large matrices. */ typedef void (*blas_call)(const char *, const char *, const int *, const int *, - const int *, const GFC_INTEGER_8 *, const GFC_INTEGER_8 *, - const int *, const GFC_INTEGER_8 *, const int *, - const GFC_INTEGER_8 *, GFC_INTEGER_8 *, const int *, - int, int); + const int *, const GFC_UINTEGER_8 *, const GFC_UINTEGER_8 *, + const int *, const GFC_UINTEGER_8 *, const int *, + const GFC_UINTEGER_8 *, GFC_UINTEGER_8 *, const int *, + int, int); /* The order of loops is different in the case of plain matrix multiplication C=MATMUL(A,B), and in the frequent special case where @@ -69,8 +69,8 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *, see if there is a way to perform the matrix multiplication by a call to the BLAS gemm function. */ -extern void matmul_i8 (gfc_array_i8 * const restrict retarray, - gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas, +extern void matmul_i8 (gfc_array_m8 * const restrict retarray, + gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas, int blas_limit, blas_call gemm); export_proto(matmul_i8); @@ -80,17 +80,17 @@ export_proto(matmul_i8); #ifdef HAVE_AVX static void -matmul_i8_avx (gfc_array_i8 * const restrict retarray, - gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas, +matmul_i8_avx (gfc_array_m8 * const restrict retarray, + gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx"))); static void -matmul_i8_avx (gfc_array_i8 * const restrict retarray, - gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas, +matmul_i8_avx (gfc_array_m8 * const restrict retarray, + gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_8 * restrict abase; - const GFC_INTEGER_8 * restrict bbase; - GFC_INTEGER_8 * restrict dest; + const GFC_UINTEGER_8 * restrict abase; + const GFC_UINTEGER_8 * restrict bbase; + GFC_UINTEGER_8 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -132,7 +132,7 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_8)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_8)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -251,7 +251,7 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_8 one = 1, zero = 0; + const GFC_UINTEGER_8 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -289,8 +289,8 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_8 *a, *b; - GFC_INTEGER_8 *c; + const GFC_UINTEGER_8 *a, *b; + GFC_UINTEGER_8 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -298,11 +298,11 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_8 *t1; + GFC_UINTEGER_8 *t1; a = abase; b = bbase; @@ -322,7 +322,7 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_8)0; + c[i + j * c_dim1] = (GFC_UINTEGER_8)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -339,7 +339,7 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_8)); /* Start turning the crank. */ i1 = n; @@ -557,10 +557,10 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_8 *restrict abase_x; - const GFC_INTEGER_8 *restrict bbase_y; - GFC_INTEGER_8 *restrict dest_y; - GFC_INTEGER_8 s; + const GFC_UINTEGER_8 *restrict abase_x; + const GFC_UINTEGER_8 *restrict bbase_y; + GFC_UINTEGER_8 *restrict dest_y; + GFC_UINTEGER_8 s; for (y = 0; y < ycount; y++) { @@ -569,7 +569,7 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_8) 0; + s = (GFC_UINTEGER_8) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -578,13 +578,13 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray, } else { - const GFC_INTEGER_8 *restrict bbase_y; - GFC_INTEGER_8 s; + const GFC_UINTEGER_8 *restrict bbase_y; + GFC_UINTEGER_8 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_8) 0; + s = (GFC_UINTEGER_8) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -593,13 +593,13 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_8 *restrict bbase_y; - GFC_INTEGER_8 s; + const GFC_UINTEGER_8 *restrict bbase_y; + GFC_UINTEGER_8 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_8) 0; + s = (GFC_UINTEGER_8) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -609,7 +609,7 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_8)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -621,10 +621,10 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray, } else { - const GFC_INTEGER_8 *restrict abase_x; - const GFC_INTEGER_8 *restrict bbase_y; - GFC_INTEGER_8 *restrict dest_y; - GFC_INTEGER_8 s; + const GFC_UINTEGER_8 *restrict abase_x; + const GFC_UINTEGER_8 *restrict bbase_y; + GFC_UINTEGER_8 *restrict dest_y; + GFC_UINTEGER_8 s; for (y = 0; y < ycount; y++) { @@ -633,7 +633,7 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_8) 0; + s = (GFC_UINTEGER_8) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; @@ -649,17 +649,17 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray, #ifdef HAVE_AVX2 static void -matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, - gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas, +matmul_i8_avx2 (gfc_array_m8 * const restrict retarray, + gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma"))); static void -matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, - gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas, +matmul_i8_avx2 (gfc_array_m8 * const restrict retarray, + gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_8 * restrict abase; - const GFC_INTEGER_8 * restrict bbase; - GFC_INTEGER_8 * restrict dest; + const GFC_UINTEGER_8 * restrict abase; + const GFC_UINTEGER_8 * restrict bbase; + GFC_UINTEGER_8 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -701,7 +701,7 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_8)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_8)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -820,7 +820,7 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_8 one = 1, zero = 0; + const GFC_UINTEGER_8 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -858,8 +858,8 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_8 *a, *b; - GFC_INTEGER_8 *c; + const GFC_UINTEGER_8 *a, *b; + GFC_UINTEGER_8 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -867,11 +867,11 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_8 *t1; + GFC_UINTEGER_8 *t1; a = abase; b = bbase; @@ -891,7 +891,7 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_8)0; + c[i + j * c_dim1] = (GFC_UINTEGER_8)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -908,7 +908,7 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_8)); /* Start turning the crank. */ i1 = n; @@ -1126,10 +1126,10 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_8 *restrict abase_x; - const GFC_INTEGER_8 *restrict bbase_y; - GFC_INTEGER_8 *restrict dest_y; - GFC_INTEGER_8 s; + const GFC_UINTEGER_8 *restrict abase_x; + const GFC_UINTEGER_8 *restrict bbase_y; + GFC_UINTEGER_8 *restrict dest_y; + GFC_UINTEGER_8 s; for (y = 0; y < ycount; y++) { @@ -1138,7 +1138,7 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_8) 0; + s = (GFC_UINTEGER_8) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -1147,13 +1147,13 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, } else { - const GFC_INTEGER_8 *restrict bbase_y; - GFC_INTEGER_8 s; + const GFC_UINTEGER_8 *restrict bbase_y; + GFC_UINTEGER_8 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_8) 0; + s = (GFC_UINTEGER_8) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -1162,13 +1162,13 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_8 *restrict bbase_y; - GFC_INTEGER_8 s; + const GFC_UINTEGER_8 *restrict bbase_y; + GFC_UINTEGER_8 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_8) 0; + s = (GFC_UINTEGER_8) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -1178,7 +1178,7 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_8)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -1190,10 +1190,10 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, } else { - const GFC_INTEGER_8 *restrict abase_x; - const GFC_INTEGER_8 *restrict bbase_y; - GFC_INTEGER_8 *restrict dest_y; - GFC_INTEGER_8 s; + const GFC_UINTEGER_8 *restrict abase_x; + const GFC_UINTEGER_8 *restrict bbase_y; + GFC_UINTEGER_8 *restrict dest_y; + GFC_UINTEGER_8 s; for (y = 0; y < ycount; y++) { @@ -1202,7 +1202,7 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_8) 0; + s = (GFC_UINTEGER_8) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; @@ -1218,17 +1218,17 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, #ifdef HAVE_AVX512F static void -matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, - gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas, +matmul_i8_avx512f (gfc_array_m8 * const restrict retarray, + gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx512f"))); static void -matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, - gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas, +matmul_i8_avx512f (gfc_array_m8 * const restrict retarray, + gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_8 * restrict abase; - const GFC_INTEGER_8 * restrict bbase; - GFC_INTEGER_8 * restrict dest; + const GFC_UINTEGER_8 * restrict abase; + const GFC_UINTEGER_8 * restrict bbase; + GFC_UINTEGER_8 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -1270,7 +1270,7 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_8)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_8)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -1389,7 +1389,7 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_8 one = 1, zero = 0; + const GFC_UINTEGER_8 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -1427,8 +1427,8 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_8 *a, *b; - GFC_INTEGER_8 *c; + const GFC_UINTEGER_8 *a, *b; + GFC_UINTEGER_8 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -1436,11 +1436,11 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_8 *t1; + GFC_UINTEGER_8 *t1; a = abase; b = bbase; @@ -1460,7 +1460,7 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_8)0; + c[i + j * c_dim1] = (GFC_UINTEGER_8)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -1477,7 +1477,7 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_8)); /* Start turning the crank. */ i1 = n; @@ -1695,10 +1695,10 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_8 *restrict abase_x; - const GFC_INTEGER_8 *restrict bbase_y; - GFC_INTEGER_8 *restrict dest_y; - GFC_INTEGER_8 s; + const GFC_UINTEGER_8 *restrict abase_x; + const GFC_UINTEGER_8 *restrict bbase_y; + GFC_UINTEGER_8 *restrict dest_y; + GFC_UINTEGER_8 s; for (y = 0; y < ycount; y++) { @@ -1707,7 +1707,7 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_8) 0; + s = (GFC_UINTEGER_8) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -1716,13 +1716,13 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, } else { - const GFC_INTEGER_8 *restrict bbase_y; - GFC_INTEGER_8 s; + const GFC_UINTEGER_8 *restrict bbase_y; + GFC_UINTEGER_8 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_8) 0; + s = (GFC_UINTEGER_8) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -1731,13 +1731,13 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_8 *restrict bbase_y; - GFC_INTEGER_8 s; + const GFC_UINTEGER_8 *restrict bbase_y; + GFC_UINTEGER_8 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_8) 0; + s = (GFC_UINTEGER_8) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -1747,7 +1747,7 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_8)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -1759,10 +1759,10 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, } else { - const GFC_INTEGER_8 *restrict abase_x; - const GFC_INTEGER_8 *restrict bbase_y; - GFC_INTEGER_8 *restrict dest_y; - GFC_INTEGER_8 s; + const GFC_UINTEGER_8 *restrict abase_x; + const GFC_UINTEGER_8 *restrict bbase_y; + GFC_UINTEGER_8 *restrict dest_y; + GFC_UINTEGER_8 s; for (y = 0; y < ycount; y++) { @@ -1771,7 +1771,7 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_8) 0; + s = (GFC_UINTEGER_8) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; @@ -1789,29 +1789,29 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, #if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) void -matmul_i8_avx128_fma3 (gfc_array_i8 * const restrict retarray, - gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas, +matmul_i8_avx128_fma3 (gfc_array_m8 * const restrict retarray, + gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); internal_proto(matmul_i8_avx128_fma3); #endif #if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) void -matmul_i8_avx128_fma4 (gfc_array_i8 * const restrict retarray, - gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas, +matmul_i8_avx128_fma4 (gfc_array_m8 * const restrict retarray, + gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); internal_proto(matmul_i8_avx128_fma4); #endif /* Function to fall back to if there is no special processor-specific version. */ static void -matmul_i8_vanilla (gfc_array_i8 * const restrict retarray, - gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas, +matmul_i8_vanilla (gfc_array_m8 * const restrict retarray, + gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_8 * restrict abase; - const GFC_INTEGER_8 * restrict bbase; - GFC_INTEGER_8 * restrict dest; + const GFC_UINTEGER_8 * restrict abase; + const GFC_UINTEGER_8 * restrict bbase; + GFC_UINTEGER_8 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -1853,7 +1853,7 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_8)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_8)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -1972,7 +1972,7 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_8 one = 1, zero = 0; + const GFC_UINTEGER_8 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -2010,8 +2010,8 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_8 *a, *b; - GFC_INTEGER_8 *c; + const GFC_UINTEGER_8 *a, *b; + GFC_UINTEGER_8 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -2019,11 +2019,11 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_8 *t1; + GFC_UINTEGER_8 *t1; a = abase; b = bbase; @@ -2043,7 +2043,7 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_8)0; + c[i + j * c_dim1] = (GFC_UINTEGER_8)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -2060,7 +2060,7 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_8)); /* Start turning the crank. */ i1 = n; @@ -2278,10 +2278,10 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_8 *restrict abase_x; - const GFC_INTEGER_8 *restrict bbase_y; - GFC_INTEGER_8 *restrict dest_y; - GFC_INTEGER_8 s; + const GFC_UINTEGER_8 *restrict abase_x; + const GFC_UINTEGER_8 *restrict bbase_y; + GFC_UINTEGER_8 *restrict dest_y; + GFC_UINTEGER_8 s; for (y = 0; y < ycount; y++) { @@ -2290,7 +2290,7 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_8) 0; + s = (GFC_UINTEGER_8) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -2299,13 +2299,13 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray, } else { - const GFC_INTEGER_8 *restrict bbase_y; - GFC_INTEGER_8 s; + const GFC_UINTEGER_8 *restrict bbase_y; + GFC_UINTEGER_8 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_8) 0; + s = (GFC_UINTEGER_8) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -2314,13 +2314,13 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_8 *restrict bbase_y; - GFC_INTEGER_8 s; + const GFC_UINTEGER_8 *restrict bbase_y; + GFC_UINTEGER_8 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_8) 0; + s = (GFC_UINTEGER_8) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -2330,7 +2330,7 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_8)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -2342,10 +2342,10 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray, } else { - const GFC_INTEGER_8 *restrict abase_x; - const GFC_INTEGER_8 *restrict bbase_y; - GFC_INTEGER_8 *restrict dest_y; - GFC_INTEGER_8 s; + const GFC_UINTEGER_8 *restrict abase_x; + const GFC_UINTEGER_8 *restrict bbase_y; + GFC_UINTEGER_8 *restrict dest_y; + GFC_UINTEGER_8 s; for (y = 0; y < ycount; y++) { @@ -2354,7 +2354,7 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_8) 0; + s = (GFC_UINTEGER_8) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; @@ -2371,16 +2371,16 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray, /* Currently, this is i386 only. Adjust for other architectures. */ -void matmul_i8 (gfc_array_i8 * const restrict retarray, - gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas, +void matmul_i8 (gfc_array_m8 * const restrict retarray, + gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - static void (*matmul_p) (gfc_array_i8 * const restrict retarray, - gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas, + static void (*matmul_p) (gfc_array_m8 * const restrict retarray, + gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas, int blas_limit, blas_call gemm); - void (*matmul_fn) (gfc_array_i8 * const restrict retarray, - gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas, + void (*matmul_fn) (gfc_array_m8 * const restrict retarray, + gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas, int blas_limit, blas_call gemm); matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); @@ -2447,13 +2447,13 @@ void matmul_i8 (gfc_array_i8 * const restrict retarray, #else /* Just the vanilla function. */ void -matmul_i8 (gfc_array_i8 * const restrict retarray, - gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas, +matmul_i8 (gfc_array_m8 * const restrict retarray, + gfc_array_m8 * const restrict a, gfc_array_m8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - const GFC_INTEGER_8 * restrict abase; - const GFC_INTEGER_8 * restrict bbase; - GFC_INTEGER_8 * restrict dest; + const GFC_UINTEGER_8 * restrict abase; + const GFC_UINTEGER_8 * restrict bbase; + GFC_UINTEGER_8 * restrict dest; index_type rxstride, rystride, axstride, aystride, bxstride, bystride; index_type x, y, n, count, xcount, ycount; @@ -2495,7 +2495,7 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, } retarray->base_addr - = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_8)); + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_UINTEGER_8)); retarray->offset = 0; } else if (unlikely (compile_options.bounds_check)) @@ -2614,7 +2614,7 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, > POW3(blas_limit))) { const int m = xcount, n = ycount, k = count, ldc = rystride; - const GFC_INTEGER_8 one = 1, zero = 0; + const GFC_UINTEGER_8 one = 1, zero = 0; const int lda = (axstride == 1) ? aystride : axstride, ldb = (bxstride == 1) ? bystride : bxstride; @@ -2652,8 +2652,8 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, from netlib.org, translated to C, and modified for matmul.m4. */ - const GFC_INTEGER_8 *a, *b; - GFC_INTEGER_8 *c; + const GFC_UINTEGER_8 *a, *b; + GFC_UINTEGER_8 *c; const index_type m = xcount, n = ycount, k = count; /* System generated locals */ @@ -2661,11 +2661,11 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42, + GFC_UINTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; - GFC_INTEGER_8 *t1; + GFC_UINTEGER_8 *t1; a = abase; b = bbase; @@ -2685,7 +2685,7 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_8)0; + c[i + j * c_dim1] = (GFC_UINTEGER_8)0; /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) @@ -2702,7 +2702,7 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, if (t1_dim > 65536) t1_dim = 65536; - t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8)); + t1 = malloc (t1_dim * sizeof(GFC_UINTEGER_8)); /* Start turning the crank. */ i1 = n; @@ -2920,10 +2920,10 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, { if (GFC_DESCRIPTOR_RANK (a) != 1) { - const GFC_INTEGER_8 *restrict abase_x; - const GFC_INTEGER_8 *restrict bbase_y; - GFC_INTEGER_8 *restrict dest_y; - GFC_INTEGER_8 s; + const GFC_UINTEGER_8 *restrict abase_x; + const GFC_UINTEGER_8 *restrict bbase_y; + GFC_UINTEGER_8 *restrict dest_y; + GFC_UINTEGER_8 s; for (y = 0; y < ycount; y++) { @@ -2932,7 +2932,7 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_8) 0; + s = (GFC_UINTEGER_8) 0; for (n = 0; n < count; n++) s += abase_x[n] * bbase_y[n]; dest_y[x] = s; @@ -2941,13 +2941,13 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, } else { - const GFC_INTEGER_8 *restrict bbase_y; - GFC_INTEGER_8 s; + const GFC_UINTEGER_8 *restrict bbase_y; + GFC_UINTEGER_8 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_8) 0; + s = (GFC_UINTEGER_8) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n]; dest[y*rystride] = s; @@ -2956,13 +2956,13 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, } else if (GFC_DESCRIPTOR_RANK (a) == 1) { - const GFC_INTEGER_8 *restrict bbase_y; - GFC_INTEGER_8 s; + const GFC_UINTEGER_8 *restrict bbase_y; + GFC_UINTEGER_8 s; for (y = 0; y < ycount; y++) { bbase_y = &bbase[y*bystride]; - s = (GFC_INTEGER_8) 0; + s = (GFC_UINTEGER_8) 0; for (n = 0; n < count; n++) s += abase[n*axstride] * bbase_y[n*bxstride]; dest[y*rxstride] = s; @@ -2972,7 +2972,7 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, { for (y = 0; y < ycount; y++) for (x = 0; x < xcount; x++) - dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0; + dest[x*rxstride + y*rystride] = (GFC_UINTEGER_8)0; for (y = 0; y < ycount; y++) for (n = 0; n < count; n++) @@ -2984,10 +2984,10 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, } else { - const GFC_INTEGER_8 *restrict abase_x; - const GFC_INTEGER_8 *restrict bbase_y; - GFC_INTEGER_8 *restrict dest_y; - GFC_INTEGER_8 s; + const GFC_UINTEGER_8 *restrict abase_x; + const GFC_UINTEGER_8 *restrict bbase_y; + GFC_UINTEGER_8 *restrict dest_y; + GFC_UINTEGER_8 s; for (y = 0; y < ycount; y++) { @@ -2996,7 +2996,7 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, for (x = 0; x < xcount; x++) { abase_x = &abase[x*axstride]; - s = (GFC_INTEGER_8) 0; + s = (GFC_UINTEGER_8) 0; for (n = 0; n < count; n++) s += abase_x[n*aystride] * bbase_y[n*bxstride]; dest_y[x*rxstride] = s; diff --git a/libgfortran/generated/matmul_r10.c b/libgfortran/generated/matmul_r10.c index c3434c2543fa..9d28bf3a1317 100644 --- a/libgfortran/generated/matmul_r10.c +++ b/libgfortran/generated/matmul_r10.c @@ -35,10 +35,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see matrices. */ typedef void (*blas_call)(const char *, const char *, const int *, const int *, - const int *, const GFC_REAL_10 *, const GFC_REAL_10 *, - const int *, const GFC_REAL_10 *, const int *, - const GFC_REAL_10 *, GFC_REAL_10 *, const int *, - int, int); + const int *, const GFC_REAL_10 *, const GFC_REAL_10 *, + const int *, const GFC_REAL_10 *, const int *, + const GFC_REAL_10 *, GFC_REAL_10 *, const int *, + int, int); /* The order of loops is different in the case of plain matrix multiplication C=MATMUL(A,B), and in the frequent special case where @@ -69,7 +69,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *, see if there is a way to perform the matrix multiplication by a call to the BLAS gemm function. */ -extern void matmul_r10 (gfc_array_r10 * const restrict retarray, +extern void matmul_r10 (gfc_array_r10 * const restrict retarray, gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas, int blas_limit, blas_call gemm); export_proto(matmul_r10); @@ -80,11 +80,11 @@ export_proto(matmul_r10); #ifdef HAVE_AVX static void -matmul_r10_avx (gfc_array_r10 * const restrict retarray, +matmul_r10_avx (gfc_array_r10 * const restrict retarray, gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx"))); static void -matmul_r10_avx (gfc_array_r10 * const restrict retarray, +matmul_r10_avx (gfc_array_r10 * const restrict retarray, gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -649,11 +649,11 @@ matmul_r10_avx (gfc_array_r10 * const restrict retarray, #ifdef HAVE_AVX2 static void -matmul_r10_avx2 (gfc_array_r10 * const restrict retarray, +matmul_r10_avx2 (gfc_array_r10 * const restrict retarray, gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma"))); static void -matmul_r10_avx2 (gfc_array_r10 * const restrict retarray, +matmul_r10_avx2 (gfc_array_r10 * const restrict retarray, gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -1218,11 +1218,11 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict retarray, #ifdef HAVE_AVX512F static void -matmul_r10_avx512f (gfc_array_r10 * const restrict retarray, +matmul_r10_avx512f (gfc_array_r10 * const restrict retarray, gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx512f"))); static void -matmul_r10_avx512f (gfc_array_r10 * const restrict retarray, +matmul_r10_avx512f (gfc_array_r10 * const restrict retarray, gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -1789,7 +1789,7 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict retarray, #if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) void -matmul_r10_avx128_fma3 (gfc_array_r10 * const restrict retarray, +matmul_r10_avx128_fma3 (gfc_array_r10 * const restrict retarray, gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); internal_proto(matmul_r10_avx128_fma3); @@ -1797,7 +1797,7 @@ internal_proto(matmul_r10_avx128_fma3); #if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) void -matmul_r10_avx128_fma4 (gfc_array_r10 * const restrict retarray, +matmul_r10_avx128_fma4 (gfc_array_r10 * const restrict retarray, gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); internal_proto(matmul_r10_avx128_fma4); @@ -1805,7 +1805,7 @@ internal_proto(matmul_r10_avx128_fma4); /* Function to fall back to if there is no special processor-specific version. */ static void -matmul_r10_vanilla (gfc_array_r10 * const restrict retarray, +matmul_r10_vanilla (gfc_array_r10 * const restrict retarray, gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -2371,15 +2371,15 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict retarray, /* Currently, this is i386 only. Adjust for other architectures. */ -void matmul_r10 (gfc_array_r10 * const restrict retarray, +void matmul_r10 (gfc_array_r10 * const restrict retarray, gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - static void (*matmul_p) (gfc_array_r10 * const restrict retarray, + static void (*matmul_p) (gfc_array_r10 * const restrict retarray, gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas, int blas_limit, blas_call gemm); - void (*matmul_fn) (gfc_array_r10 * const restrict retarray, + void (*matmul_fn) (gfc_array_r10 * const restrict retarray, gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas, int blas_limit, blas_call gemm); @@ -2447,7 +2447,7 @@ void matmul_r10 (gfc_array_r10 * const restrict retarray, #else /* Just the vanilla function. */ void -matmul_r10 (gfc_array_r10 * const restrict retarray, +matmul_r10 (gfc_array_r10 * const restrict retarray, gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { diff --git a/libgfortran/generated/matmul_r16.c b/libgfortran/generated/matmul_r16.c index 2fe50d216677..889280cb4caa 100644 --- a/libgfortran/generated/matmul_r16.c +++ b/libgfortran/generated/matmul_r16.c @@ -35,10 +35,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see matrices. */ typedef void (*blas_call)(const char *, const char *, const int *, const int *, - const int *, const GFC_REAL_16 *, const GFC_REAL_16 *, - const int *, const GFC_REAL_16 *, const int *, - const GFC_REAL_16 *, GFC_REAL_16 *, const int *, - int, int); + const int *, const GFC_REAL_16 *, const GFC_REAL_16 *, + const int *, const GFC_REAL_16 *, const int *, + const GFC_REAL_16 *, GFC_REAL_16 *, const int *, + int, int); /* The order of loops is different in the case of plain matrix multiplication C=MATMUL(A,B), and in the frequent special case where @@ -69,7 +69,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *, see if there is a way to perform the matrix multiplication by a call to the BLAS gemm function. */ -extern void matmul_r16 (gfc_array_r16 * const restrict retarray, +extern void matmul_r16 (gfc_array_r16 * const restrict retarray, gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas, int blas_limit, blas_call gemm); export_proto(matmul_r16); @@ -80,11 +80,11 @@ export_proto(matmul_r16); #ifdef HAVE_AVX static void -matmul_r16_avx (gfc_array_r16 * const restrict retarray, +matmul_r16_avx (gfc_array_r16 * const restrict retarray, gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx"))); static void -matmul_r16_avx (gfc_array_r16 * const restrict retarray, +matmul_r16_avx (gfc_array_r16 * const restrict retarray, gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -649,11 +649,11 @@ matmul_r16_avx (gfc_array_r16 * const restrict retarray, #ifdef HAVE_AVX2 static void -matmul_r16_avx2 (gfc_array_r16 * const restrict retarray, +matmul_r16_avx2 (gfc_array_r16 * const restrict retarray, gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma"))); static void -matmul_r16_avx2 (gfc_array_r16 * const restrict retarray, +matmul_r16_avx2 (gfc_array_r16 * const restrict retarray, gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -1218,11 +1218,11 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict retarray, #ifdef HAVE_AVX512F static void -matmul_r16_avx512f (gfc_array_r16 * const restrict retarray, +matmul_r16_avx512f (gfc_array_r16 * const restrict retarray, gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx512f"))); static void -matmul_r16_avx512f (gfc_array_r16 * const restrict retarray, +matmul_r16_avx512f (gfc_array_r16 * const restrict retarray, gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -1789,7 +1789,7 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict retarray, #if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) void -matmul_r16_avx128_fma3 (gfc_array_r16 * const restrict retarray, +matmul_r16_avx128_fma3 (gfc_array_r16 * const restrict retarray, gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); internal_proto(matmul_r16_avx128_fma3); @@ -1797,7 +1797,7 @@ internal_proto(matmul_r16_avx128_fma3); #if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) void -matmul_r16_avx128_fma4 (gfc_array_r16 * const restrict retarray, +matmul_r16_avx128_fma4 (gfc_array_r16 * const restrict retarray, gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); internal_proto(matmul_r16_avx128_fma4); @@ -1805,7 +1805,7 @@ internal_proto(matmul_r16_avx128_fma4); /* Function to fall back to if there is no special processor-specific version. */ static void -matmul_r16_vanilla (gfc_array_r16 * const restrict retarray, +matmul_r16_vanilla (gfc_array_r16 * const restrict retarray, gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -2371,15 +2371,15 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict retarray, /* Currently, this is i386 only. Adjust for other architectures. */ -void matmul_r16 (gfc_array_r16 * const restrict retarray, +void matmul_r16 (gfc_array_r16 * const restrict retarray, gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - static void (*matmul_p) (gfc_array_r16 * const restrict retarray, + static void (*matmul_p) (gfc_array_r16 * const restrict retarray, gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas, int blas_limit, blas_call gemm); - void (*matmul_fn) (gfc_array_r16 * const restrict retarray, + void (*matmul_fn) (gfc_array_r16 * const restrict retarray, gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas, int blas_limit, blas_call gemm); @@ -2447,7 +2447,7 @@ void matmul_r16 (gfc_array_r16 * const restrict retarray, #else /* Just the vanilla function. */ void -matmul_r16 (gfc_array_r16 * const restrict retarray, +matmul_r16 (gfc_array_r16 * const restrict retarray, gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { diff --git a/libgfortran/generated/matmul_r17.c b/libgfortran/generated/matmul_r17.c index 67ff8e601e22..7ab9f2ff3dc7 100644 --- a/libgfortran/generated/matmul_r17.c +++ b/libgfortran/generated/matmul_r17.c @@ -35,10 +35,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see matrices. */ typedef void (*blas_call)(const char *, const char *, const int *, const int *, - const int *, const GFC_REAL_17 *, const GFC_REAL_17 *, - const int *, const GFC_REAL_17 *, const int *, - const GFC_REAL_17 *, GFC_REAL_17 *, const int *, - int, int); + const int *, const GFC_REAL_17 *, const GFC_REAL_17 *, + const int *, const GFC_REAL_17 *, const int *, + const GFC_REAL_17 *, GFC_REAL_17 *, const int *, + int, int); /* The order of loops is different in the case of plain matrix multiplication C=MATMUL(A,B), and in the frequent special case where @@ -69,7 +69,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *, see if there is a way to perform the matrix multiplication by a call to the BLAS gemm function. */ -extern void matmul_r17 (gfc_array_r17 * const restrict retarray, +extern void matmul_r17 (gfc_array_r17 * const restrict retarray, gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, int blas_limit, blas_call gemm); export_proto(matmul_r17); @@ -80,11 +80,11 @@ export_proto(matmul_r17); #ifdef HAVE_AVX static void -matmul_r17_avx (gfc_array_r17 * const restrict retarray, +matmul_r17_avx (gfc_array_r17 * const restrict retarray, gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx"))); static void -matmul_r17_avx (gfc_array_r17 * const restrict retarray, +matmul_r17_avx (gfc_array_r17 * const restrict retarray, gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -649,11 +649,11 @@ matmul_r17_avx (gfc_array_r17 * const restrict retarray, #ifdef HAVE_AVX2 static void -matmul_r17_avx2 (gfc_array_r17 * const restrict retarray, +matmul_r17_avx2 (gfc_array_r17 * const restrict retarray, gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma"))); static void -matmul_r17_avx2 (gfc_array_r17 * const restrict retarray, +matmul_r17_avx2 (gfc_array_r17 * const restrict retarray, gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -1218,11 +1218,11 @@ matmul_r17_avx2 (gfc_array_r17 * const restrict retarray, #ifdef HAVE_AVX512F static void -matmul_r17_avx512f (gfc_array_r17 * const restrict retarray, +matmul_r17_avx512f (gfc_array_r17 * const restrict retarray, gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx512f"))); static void -matmul_r17_avx512f (gfc_array_r17 * const restrict retarray, +matmul_r17_avx512f (gfc_array_r17 * const restrict retarray, gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -1789,7 +1789,7 @@ matmul_r17_avx512f (gfc_array_r17 * const restrict retarray, #if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) void -matmul_r17_avx128_fma3 (gfc_array_r17 * const restrict retarray, +matmul_r17_avx128_fma3 (gfc_array_r17 * const restrict retarray, gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); internal_proto(matmul_r17_avx128_fma3); @@ -1797,7 +1797,7 @@ internal_proto(matmul_r17_avx128_fma3); #if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) void -matmul_r17_avx128_fma4 (gfc_array_r17 * const restrict retarray, +matmul_r17_avx128_fma4 (gfc_array_r17 * const restrict retarray, gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); internal_proto(matmul_r17_avx128_fma4); @@ -1805,7 +1805,7 @@ internal_proto(matmul_r17_avx128_fma4); /* Function to fall back to if there is no special processor-specific version. */ static void -matmul_r17_vanilla (gfc_array_r17 * const restrict retarray, +matmul_r17_vanilla (gfc_array_r17 * const restrict retarray, gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -2371,15 +2371,15 @@ matmul_r17_vanilla (gfc_array_r17 * const restrict retarray, /* Currently, this is i386 only. Adjust for other architectures. */ -void matmul_r17 (gfc_array_r17 * const restrict retarray, +void matmul_r17 (gfc_array_r17 * const restrict retarray, gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - static void (*matmul_p) (gfc_array_r17 * const restrict retarray, + static void (*matmul_p) (gfc_array_r17 * const restrict retarray, gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, int blas_limit, blas_call gemm); - void (*matmul_fn) (gfc_array_r17 * const restrict retarray, + void (*matmul_fn) (gfc_array_r17 * const restrict retarray, gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, int blas_limit, blas_call gemm); @@ -2447,7 +2447,7 @@ void matmul_r17 (gfc_array_r17 * const restrict retarray, #else /* Just the vanilla function. */ void -matmul_r17 (gfc_array_r17 * const restrict retarray, +matmul_r17 (gfc_array_r17 * const restrict retarray, gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { diff --git a/libgfortran/generated/matmul_r4.c b/libgfortran/generated/matmul_r4.c index f1df57749c25..8117af34edde 100644 --- a/libgfortran/generated/matmul_r4.c +++ b/libgfortran/generated/matmul_r4.c @@ -35,10 +35,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see matrices. */ typedef void (*blas_call)(const char *, const char *, const int *, const int *, - const int *, const GFC_REAL_4 *, const GFC_REAL_4 *, - const int *, const GFC_REAL_4 *, const int *, - const GFC_REAL_4 *, GFC_REAL_4 *, const int *, - int, int); + const int *, const GFC_REAL_4 *, const GFC_REAL_4 *, + const int *, const GFC_REAL_4 *, const int *, + const GFC_REAL_4 *, GFC_REAL_4 *, const int *, + int, int); /* The order of loops is different in the case of plain matrix multiplication C=MATMUL(A,B), and in the frequent special case where @@ -69,7 +69,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *, see if there is a way to perform the matrix multiplication by a call to the BLAS gemm function. */ -extern void matmul_r4 (gfc_array_r4 * const restrict retarray, +extern void matmul_r4 (gfc_array_r4 * const restrict retarray, gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas, int blas_limit, blas_call gemm); export_proto(matmul_r4); @@ -80,11 +80,11 @@ export_proto(matmul_r4); #ifdef HAVE_AVX static void -matmul_r4_avx (gfc_array_r4 * const restrict retarray, +matmul_r4_avx (gfc_array_r4 * const restrict retarray, gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx"))); static void -matmul_r4_avx (gfc_array_r4 * const restrict retarray, +matmul_r4_avx (gfc_array_r4 * const restrict retarray, gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -649,11 +649,11 @@ matmul_r4_avx (gfc_array_r4 * const restrict retarray, #ifdef HAVE_AVX2 static void -matmul_r4_avx2 (gfc_array_r4 * const restrict retarray, +matmul_r4_avx2 (gfc_array_r4 * const restrict retarray, gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma"))); static void -matmul_r4_avx2 (gfc_array_r4 * const restrict retarray, +matmul_r4_avx2 (gfc_array_r4 * const restrict retarray, gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -1218,11 +1218,11 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict retarray, #ifdef HAVE_AVX512F static void -matmul_r4_avx512f (gfc_array_r4 * const restrict retarray, +matmul_r4_avx512f (gfc_array_r4 * const restrict retarray, gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx512f"))); static void -matmul_r4_avx512f (gfc_array_r4 * const restrict retarray, +matmul_r4_avx512f (gfc_array_r4 * const restrict retarray, gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -1789,7 +1789,7 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict retarray, #if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) void -matmul_r4_avx128_fma3 (gfc_array_r4 * const restrict retarray, +matmul_r4_avx128_fma3 (gfc_array_r4 * const restrict retarray, gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); internal_proto(matmul_r4_avx128_fma3); @@ -1797,7 +1797,7 @@ internal_proto(matmul_r4_avx128_fma3); #if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) void -matmul_r4_avx128_fma4 (gfc_array_r4 * const restrict retarray, +matmul_r4_avx128_fma4 (gfc_array_r4 * const restrict retarray, gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); internal_proto(matmul_r4_avx128_fma4); @@ -1805,7 +1805,7 @@ internal_proto(matmul_r4_avx128_fma4); /* Function to fall back to if there is no special processor-specific version. */ static void -matmul_r4_vanilla (gfc_array_r4 * const restrict retarray, +matmul_r4_vanilla (gfc_array_r4 * const restrict retarray, gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -2371,15 +2371,15 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict retarray, /* Currently, this is i386 only. Adjust for other architectures. */ -void matmul_r4 (gfc_array_r4 * const restrict retarray, +void matmul_r4 (gfc_array_r4 * const restrict retarray, gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - static void (*matmul_p) (gfc_array_r4 * const restrict retarray, + static void (*matmul_p) (gfc_array_r4 * const restrict retarray, gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas, int blas_limit, blas_call gemm); - void (*matmul_fn) (gfc_array_r4 * const restrict retarray, + void (*matmul_fn) (gfc_array_r4 * const restrict retarray, gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas, int blas_limit, blas_call gemm); @@ -2447,7 +2447,7 @@ void matmul_r4 (gfc_array_r4 * const restrict retarray, #else /* Just the vanilla function. */ void -matmul_r4 (gfc_array_r4 * const restrict retarray, +matmul_r4 (gfc_array_r4 * const restrict retarray, gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { diff --git a/libgfortran/generated/matmul_r8.c b/libgfortran/generated/matmul_r8.c index ddfe0a72f779..d05dede27b21 100644 --- a/libgfortran/generated/matmul_r8.c +++ b/libgfortran/generated/matmul_r8.c @@ -35,10 +35,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see matrices. */ typedef void (*blas_call)(const char *, const char *, const int *, const int *, - const int *, const GFC_REAL_8 *, const GFC_REAL_8 *, - const int *, const GFC_REAL_8 *, const int *, - const GFC_REAL_8 *, GFC_REAL_8 *, const int *, - int, int); + const int *, const GFC_REAL_8 *, const GFC_REAL_8 *, + const int *, const GFC_REAL_8 *, const int *, + const GFC_REAL_8 *, GFC_REAL_8 *, const int *, + int, int); /* The order of loops is different in the case of plain matrix multiplication C=MATMUL(A,B), and in the frequent special case where @@ -69,7 +69,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *, see if there is a way to perform the matrix multiplication by a call to the BLAS gemm function. */ -extern void matmul_r8 (gfc_array_r8 * const restrict retarray, +extern void matmul_r8 (gfc_array_r8 * const restrict retarray, gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas, int blas_limit, blas_call gemm); export_proto(matmul_r8); @@ -80,11 +80,11 @@ export_proto(matmul_r8); #ifdef HAVE_AVX static void -matmul_r8_avx (gfc_array_r8 * const restrict retarray, +matmul_r8_avx (gfc_array_r8 * const restrict retarray, gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx"))); static void -matmul_r8_avx (gfc_array_r8 * const restrict retarray, +matmul_r8_avx (gfc_array_r8 * const restrict retarray, gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -649,11 +649,11 @@ matmul_r8_avx (gfc_array_r8 * const restrict retarray, #ifdef HAVE_AVX2 static void -matmul_r8_avx2 (gfc_array_r8 * const restrict retarray, +matmul_r8_avx2 (gfc_array_r8 * const restrict retarray, gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma"))); static void -matmul_r8_avx2 (gfc_array_r8 * const restrict retarray, +matmul_r8_avx2 (gfc_array_r8 * const restrict retarray, gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -1218,11 +1218,11 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict retarray, #ifdef HAVE_AVX512F static void -matmul_r8_avx512f (gfc_array_r8 * const restrict retarray, +matmul_r8_avx512f (gfc_array_r8 * const restrict retarray, gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx512f"))); static void -matmul_r8_avx512f (gfc_array_r8 * const restrict retarray, +matmul_r8_avx512f (gfc_array_r8 * const restrict retarray, gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -1789,7 +1789,7 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict retarray, #if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) void -matmul_r8_avx128_fma3 (gfc_array_r8 * const restrict retarray, +matmul_r8_avx128_fma3 (gfc_array_r8 * const restrict retarray, gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); internal_proto(matmul_r8_avx128_fma3); @@ -1797,7 +1797,7 @@ internal_proto(matmul_r8_avx128_fma3); #if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) void -matmul_r8_avx128_fma4 (gfc_array_r8 * const restrict retarray, +matmul_r8_avx128_fma4 (gfc_array_r8 * const restrict retarray, gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); internal_proto(matmul_r8_avx128_fma4); @@ -1805,7 +1805,7 @@ internal_proto(matmul_r8_avx128_fma4); /* Function to fall back to if there is no special processor-specific version. */ static void -matmul_r8_vanilla (gfc_array_r8 * const restrict retarray, +matmul_r8_vanilla (gfc_array_r8 * const restrict retarray, gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { @@ -2371,15 +2371,15 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict retarray, /* Currently, this is i386 only. Adjust for other architectures. */ -void matmul_r8 (gfc_array_r8 * const restrict retarray, +void matmul_r8 (gfc_array_r8 * const restrict retarray, gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - static void (*matmul_p) (gfc_array_r8 * const restrict retarray, + static void (*matmul_p) (gfc_array_r8 * const restrict retarray, gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas, int blas_limit, blas_call gemm); - void (*matmul_fn) (gfc_array_r8 * const restrict retarray, + void (*matmul_fn) (gfc_array_r8 * const restrict retarray, gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas, int blas_limit, blas_call gemm); @@ -2447,7 +2447,7 @@ void matmul_r8 (gfc_array_r8 * const restrict retarray, #else /* Just the vanilla function. */ void -matmul_r8 (gfc_array_r8 * const restrict retarray, +matmul_r8 (gfc_array_r8 * const restrict retarray, gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas, int blas_limit, blas_call gemm) { diff --git a/libgfortran/libgfortran.h b/libgfortran/libgfortran.h index faf57a33358c..aaa9222c43b6 100644 --- a/libgfortran/libgfortran.h +++ b/libgfortran/libgfortran.h @@ -403,6 +403,13 @@ typedef GFC_ARRAY_DESCRIPTOR (index_type) gfc_array_index_type; #ifdef HAVE_GFC_INTEGER_16 typedef GFC_ARRAY_DESCRIPTOR (GFC_INTEGER_16) gfc_array_i16; #endif +typedef GFC_ARRAY_DESCRIPTOR (GFC_UINTEGER_1) gfc_array_m1; +typedef GFC_ARRAY_DESCRIPTOR (GFC_UINTEGER_2) gfc_array_m2; +typedef GFC_ARRAY_DESCRIPTOR (GFC_UINTEGER_4) gfc_array_m4; +typedef GFC_ARRAY_DESCRIPTOR (GFC_UINTEGER_8) gfc_array_m8; +#ifdef HAVE_GFC_UINTEGER_16 +typedef GFC_ARRAY_DESCRIPTOR (GFC_UINTEGER_16) gfc_array_m16; +#endif typedef GFC_ARRAY_DESCRIPTOR (GFC_REAL_4) gfc_array_r4; typedef GFC_ARRAY_DESCRIPTOR (GFC_REAL_8) gfc_array_r8; #ifdef HAVE_GFC_REAL_10 diff --git a/libgfortran/m4/iparm.m4 b/libgfortran/m4/iparm.m4 index b474620424b7..0c4c76c2428c 100644 --- a/libgfortran/m4/iparm.m4 +++ b/libgfortran/m4/iparm.m4 @@ -4,7 +4,7 @@ dnl This file is part of the GNU Fortran 95 Runtime Library (libgfortran) dnl Distributed under the GNU GPL with exception. See COPYING for details. dnl M4 macro file to get type names from filenames define(get_typename2, `GFC_$1_$2')dnl -define(get_typename, `get_typename2(ifelse($1,i,INTEGER,ifelse($1,r,REAL,ifelse($1,l,LOGICAL,ifelse($1,c,COMPLEX,ifelse($1,s,UINTEGER,unknown))))),`$2')')dnl +define(get_typename, `get_typename2(ifelse($1,i,INTEGER,ifelse($1,r,REAL,ifelse($1,l,LOGICAL,ifelse($1,c,COMPLEX,ifelse($1,m,UINTEGER,ifelse($1,s,UINTEGER,unknown)))))),`$2')')dnl define(get_arraytype, `gfc_array_$1$2')dnl define(define_type, `dnl ifelse(regexp($2,`^[0-9]'),-1,`dnl diff --git a/libgfortran/m4/matmul.m4 b/libgfortran/m4/matmul.m4 index 7fc1f5fa75fb..cd804e8be06d 100644 --- a/libgfortran/m4/matmul.m4 +++ b/libgfortran/m4/matmul.m4 @@ -28,6 +28,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #include ' include(iparm.m4)dnl +ifelse(index(rtype_name,`GFC_INTEGER'),`0',dnl +define(`rtype_name',patsubst(rtype_name,`GFC_INTEGER',`GFC_UINTEGER'))dnl +define(`rtype',patsubst(rtype,`gfc_array_i',`gfc_array_m')))dnl `#if defined (HAVE_'rtype_name`) @@ -36,10 +39,10 @@ include(iparm.m4)dnl matrices. */ typedef void (*blas_call)(const char *, const char *, const int *, const int *, - const int *, const 'rtype_name` *, const 'rtype_name` *, - const int *, const 'rtype_name` *, const int *, - const 'rtype_name` *, 'rtype_name` *, const int *, - int, int); + const int *, const 'rtype_name` *, const 'rtype_name` *, + const int *, const 'rtype_name` *, const int *, + const 'rtype_name` *, 'rtype_name` *, const int *, + int, int); /* The order of loops is different in the case of plain matrix multiplication C=MATMUL(A,B), and in the frequent special case where @@ -70,7 +73,7 @@ typedef void (*blas_call)(const char *, const char *, const int *, const int *, see if there is a way to perform the matrix multiplication by a call to the BLAS gemm function. */ -extern void matmul_'rtype_code` ('rtype` * const restrict retarray, +extern void matmul_'rtype_code` ('rtype` * const restrict retarray, 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas, int blas_limit, blas_call gemm); export_proto(matmul_'rtype_code`); @@ -82,7 +85,7 @@ export_proto(matmul_'rtype_code`); #ifdef HAVE_AVX 'define(`matmul_name',`matmul_'rtype_code`_avx')dnl `static void -'matmul_name` ('rtype` * const restrict retarray, +'matmul_name` ('rtype` * const restrict retarray, 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx"))); static' include(matmul_internal.m4)dnl @@ -91,7 +94,7 @@ static' include(matmul_internal.m4)dnl #ifdef HAVE_AVX2 'define(`matmul_name',`matmul_'rtype_code`_avx2')dnl `static void -'matmul_name` ('rtype` * const restrict retarray, +'matmul_name` ('rtype` * const restrict retarray, 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma"))); static' include(matmul_internal.m4)dnl @@ -100,7 +103,7 @@ static' include(matmul_internal.m4)dnl #ifdef HAVE_AVX512F 'define(`matmul_name',`matmul_'rtype_code`_avx512f')dnl `static void -'matmul_name` ('rtype` * const restrict retarray, +'matmul_name` ('rtype` * const restrict retarray, 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx512f"))); static' include(matmul_internal.m4)dnl @@ -111,7 +114,7 @@ static' include(matmul_internal.m4)dnl #if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) 'define(`matmul_name',`matmul_'rtype_code`_avx128_fma3')dnl `void -'matmul_name` ('rtype` * const restrict retarray, +'matmul_name` ('rtype` * const restrict retarray, 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); internal_proto('matmul_name`); @@ -120,7 +123,7 @@ internal_proto('matmul_name`); #if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) 'define(`matmul_name',`matmul_'rtype_code`_avx128_fma4')dnl `void -'matmul_name` ('rtype` * const restrict retarray, +'matmul_name` ('rtype` * const restrict retarray, 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas, int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); internal_proto('matmul_name`); @@ -134,15 +137,15 @@ internal_proto('matmul_name`); /* Currently, this is i386 only. Adjust for other architectures. */ -void matmul_'rtype_code` ('rtype` * const restrict retarray, +void matmul_'rtype_code` ('rtype` * const restrict retarray, 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas, int blas_limit, blas_call gemm) { - static void (*matmul_p) ('rtype` * const restrict retarray, + static void (*matmul_p) ('rtype` * const restrict retarray, 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas, int blas_limit, blas_call gemm); - void (*matmul_fn) ('rtype` * const restrict retarray, + void (*matmul_fn) ('rtype` * const restrict retarray, 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas, int blas_limit, blas_call gemm); diff --git a/libgfortran/m4/matmul_internal.m4 b/libgfortran/m4/matmul_internal.m4 index 0e96207a0fc0..20b1a486a4a8 100644 --- a/libgfortran/m4/matmul_internal.m4 +++ b/libgfortran/m4/matmul_internal.m4 @@ -1,5 +1,5 @@ `void -'matmul_name` ('rtype` * const restrict retarray, +'matmul_name` ('rtype` * const restrict retarray, 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas, int blas_limit, blas_call gemm) { -- 2.47.2