]>
Commit | Line | Data |
---|---|---|
644cb69f | 1 | /* Implementation of the MATMUL intrinsic |
a5544970 | 2 | Copyright (C) 2002-2019 Free Software Foundation, Inc. |
644cb69f FXC |
3 | Contributed by Paul Brook <paul@nowt.org> |
4 | ||
21d1335b | 5 | This file is part of the GNU Fortran runtime library (libgfortran). |
644cb69f FXC |
6 | |
7 | Libgfortran is free software; you can redistribute it and/or | |
8 | modify it under the terms of the GNU General Public | |
9 | License as published by the Free Software Foundation; either | |
748086b7 | 10 | version 3 of the License, or (at your option) any later version. |
644cb69f FXC |
11 | |
12 | Libgfortran is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
748086b7 JJ |
17 | Under Section 7 of GPL version 3, you are granted additional |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
644cb69f | 25 | |
36ae8a61 | 26 | #include "libgfortran.h" |
644cb69f | 27 | #include <assert.h> |
36ae8a61 | 28 | |
644cb69f FXC |
29 | |
30 | #if defined (HAVE_GFC_LOGICAL_16) | |
31 | ||
32 | /* Dimensions: retarray(x,y) a(x, count) b(count,y). | |
33 | Either a or b can be rank 1. In this case x or y is 1. */ | |
34 | ||
85206901 | 35 | extern void matmul_l16 (gfc_array_l16 * const restrict, |
28dc6b33 | 36 | gfc_array_l1 * const restrict, gfc_array_l1 * const restrict); |
644cb69f FXC |
37 | export_proto(matmul_l16); |
38 | ||
39 | void | |
85206901 | 40 | matmul_l16 (gfc_array_l16 * const restrict retarray, |
28dc6b33 | 41 | gfc_array_l1 * const restrict a, gfc_array_l1 * const restrict b) |
644cb69f | 42 | { |
28dc6b33 TK |
43 | const GFC_LOGICAL_1 * restrict abase; |
44 | const GFC_LOGICAL_1 * restrict bbase; | |
85206901 | 45 | GFC_LOGICAL_16 * restrict dest; |
644cb69f FXC |
46 | index_type rxstride; |
47 | index_type rystride; | |
48 | index_type xcount; | |
49 | index_type ycount; | |
50 | index_type xstride; | |
51 | index_type ystride; | |
52 | index_type x; | |
53 | index_type y; | |
28dc6b33 TK |
54 | int a_kind; |
55 | int b_kind; | |
644cb69f | 56 | |
28dc6b33 TK |
57 | const GFC_LOGICAL_1 * restrict pa; |
58 | const GFC_LOGICAL_1 * restrict pb; | |
644cb69f FXC |
59 | index_type astride; |
60 | index_type bstride; | |
61 | index_type count; | |
62 | index_type n; | |
63 | ||
64 | assert (GFC_DESCRIPTOR_RANK (a) == 2 | |
65 | || GFC_DESCRIPTOR_RANK (b) == 2); | |
66 | ||
21d1335b | 67 | if (retarray->base_addr == NULL) |
644cb69f FXC |
68 | { |
69 | if (GFC_DESCRIPTOR_RANK (a) == 1) | |
70 | { | |
dfb55fdc TK |
71 | GFC_DIMENSION_SET(retarray->dim[0], 0, |
72 | GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); | |
644cb69f FXC |
73 | } |
74 | else if (GFC_DESCRIPTOR_RANK (b) == 1) | |
75 | { | |
dfb55fdc TK |
76 | GFC_DIMENSION_SET(retarray->dim[0], 0, |
77 | GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); | |
644cb69f FXC |
78 | } |
79 | else | |
80 | { | |
dfb55fdc TK |
81 | GFC_DIMENSION_SET(retarray->dim[0], 0, |
82 | GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); | |
83 | ||
84 | GFC_DIMENSION_SET(retarray->dim[1], 0, | |
85 | GFC_DESCRIPTOR_EXTENT(b,1) - 1, | |
86 | GFC_DESCRIPTOR_EXTENT(retarray,0)); | |
644cb69f FXC |
87 | } |
88 | ||
21d1335b | 89 | retarray->base_addr |
92e6f3a4 | 90 | = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_LOGICAL_16)); |
644cb69f FXC |
91 | retarray->offset = 0; |
92 | } | |
9731c4a3 | 93 | else if (unlikely (compile_options.bounds_check)) |
9ad13e91 TK |
94 | { |
95 | index_type ret_extent, arg_extent; | |
96 | ||
97 | if (GFC_DESCRIPTOR_RANK (a) == 1) | |
98 | { | |
dfb55fdc TK |
99 | arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); |
100 | ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); | |
9ad13e91 TK |
101 | if (arg_extent != ret_extent) |
102 | runtime_error ("Incorrect extent in return array in" | |
103 | " MATMUL intrinsic: is %ld, should be %ld", | |
104 | (long int) ret_extent, (long int) arg_extent); | |
105 | } | |
106 | else if (GFC_DESCRIPTOR_RANK (b) == 1) | |
107 | { | |
dfb55fdc TK |
108 | arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); |
109 | ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); | |
9ad13e91 TK |
110 | if (arg_extent != ret_extent) |
111 | runtime_error ("Incorrect extent in return array in" | |
112 | " MATMUL intrinsic: is %ld, should be %ld", | |
113 | (long int) ret_extent, (long int) arg_extent); | |
114 | } | |
115 | else | |
116 | { | |
dfb55fdc TK |
117 | arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); |
118 | ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); | |
9ad13e91 TK |
119 | if (arg_extent != ret_extent) |
120 | runtime_error ("Incorrect extent in return array in" | |
121 | " MATMUL intrinsic for dimension 1:" | |
122 | " is %ld, should be %ld", | |
123 | (long int) ret_extent, (long int) arg_extent); | |
124 | ||
dfb55fdc TK |
125 | arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); |
126 | ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); | |
9ad13e91 TK |
127 | if (arg_extent != ret_extent) |
128 | runtime_error ("Incorrect extent in return array in" | |
129 | " MATMUL intrinsic for dimension 2:" | |
130 | " is %ld, should be %ld", | |
131 | (long int) ret_extent, (long int) arg_extent); | |
132 | } | |
133 | } | |
644cb69f | 134 | |
21d1335b | 135 | abase = a->base_addr; |
28dc6b33 TK |
136 | a_kind = GFC_DESCRIPTOR_SIZE (a); |
137 | ||
138 | if (a_kind == 1 || a_kind == 2 || a_kind == 4 || a_kind == 8 | |
139 | #ifdef HAVE_GFC_LOGICAL_16 | |
140 | || a_kind == 16 | |
141 | #endif | |
142 | ) | |
143 | abase = GFOR_POINTER_TO_L1 (abase, a_kind); | |
144 | else | |
145 | internal_error (NULL, "Funny sized logical array"); | |
146 | ||
21d1335b | 147 | bbase = b->base_addr; |
28dc6b33 TK |
148 | b_kind = GFC_DESCRIPTOR_SIZE (b); |
149 | ||
150 | if (b_kind == 1 || b_kind == 2 || b_kind == 4 || b_kind == 8 | |
151 | #ifdef HAVE_GFC_LOGICAL_16 | |
152 | || b_kind == 16 | |
153 | #endif | |
154 | ) | |
155 | bbase = GFOR_POINTER_TO_L1 (bbase, b_kind); | |
156 | else | |
157 | internal_error (NULL, "Funny sized logical array"); | |
158 | ||
21d1335b | 159 | dest = retarray->base_addr; |
644cb69f | 160 | |
644cb69f FXC |
161 | |
162 | if (GFC_DESCRIPTOR_RANK (retarray) == 1) | |
163 | { | |
dfb55fdc | 164 | rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); |
644cb69f FXC |
165 | rystride = rxstride; |
166 | } | |
167 | else | |
168 | { | |
dfb55fdc TK |
169 | rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); |
170 | rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); | |
644cb69f FXC |
171 | } |
172 | ||
173 | /* If we have rank 1 parameters, zero the absent stride, and set the size to | |
174 | one. */ | |
175 | if (GFC_DESCRIPTOR_RANK (a) == 1) | |
176 | { | |
dfb55fdc TK |
177 | astride = GFC_DESCRIPTOR_STRIDE_BYTES(a,0); |
178 | count = GFC_DESCRIPTOR_EXTENT(a,0); | |
644cb69f FXC |
179 | xstride = 0; |
180 | rxstride = 0; | |
181 | xcount = 1; | |
182 | } | |
183 | else | |
184 | { | |
dfb55fdc TK |
185 | astride = GFC_DESCRIPTOR_STRIDE_BYTES(a,1); |
186 | count = GFC_DESCRIPTOR_EXTENT(a,1); | |
187 | xstride = GFC_DESCRIPTOR_STRIDE_BYTES(a,0); | |
188 | xcount = GFC_DESCRIPTOR_EXTENT(a,0); | |
644cb69f FXC |
189 | } |
190 | if (GFC_DESCRIPTOR_RANK (b) == 1) | |
191 | { | |
dfb55fdc TK |
192 | bstride = GFC_DESCRIPTOR_STRIDE_BYTES(b,0); |
193 | assert(count == GFC_DESCRIPTOR_EXTENT(b,0)); | |
644cb69f FXC |
194 | ystride = 0; |
195 | rystride = 0; | |
196 | ycount = 1; | |
197 | } | |
198 | else | |
199 | { | |
dfb55fdc TK |
200 | bstride = GFC_DESCRIPTOR_STRIDE_BYTES(b,0); |
201 | assert(count == GFC_DESCRIPTOR_EXTENT(b,0)); | |
202 | ystride = GFC_DESCRIPTOR_STRIDE_BYTES(b,1); | |
203 | ycount = GFC_DESCRIPTOR_EXTENT(b,1); | |
644cb69f FXC |
204 | } |
205 | ||
206 | for (y = 0; y < ycount; y++) | |
207 | { | |
208 | for (x = 0; x < xcount; x++) | |
209 | { | |
210 | /* Do the summation for this element. For real and integer types | |
211 | this is the same as DOT_PRODUCT. For complex types we use do | |
212 | a*b, not conjg(a)*b. */ | |
213 | pa = abase; | |
214 | pb = bbase; | |
215 | *dest = 0; | |
216 | ||
217 | for (n = 0; n < count; n++) | |
218 | { | |
219 | if (*pa && *pb) | |
220 | { | |
221 | *dest = 1; | |
222 | break; | |
223 | } | |
224 | pa += astride; | |
225 | pb += bstride; | |
226 | } | |
227 | ||
228 | dest += rxstride; | |
229 | abase += xstride; | |
230 | } | |
231 | abase -= xstride * xcount; | |
232 | bbase += ystride; | |
233 | dest += rystride - (rxstride * xcount); | |
234 | } | |
235 | } | |
236 | ||
237 | #endif | |
28dc6b33 | 238 |