]>
Commit | Line | Data |
---|---|---|
6de9cd9a | 1 | `/* Implementation of the MATMUL intrinsic |
748086b7 | 2 | Copyright 2002, 2005, 2006, 2007, 2009 Free Software Foundation, Inc. |
6de9cd9a DN |
3 | Contributed by Paul Brook <paul@nowt.org> |
4 | ||
57dea9f6 | 5 | This file is part of the GNU Fortran 95 runtime library (libgfortran). |
6de9cd9a DN |
6 | |
7 | Libgfortran is free software; you can redistribute it and/or | |
57dea9f6 | 8 | modify it under the terms of the GNU General Public |
6de9cd9a | 9 | License as published by the Free Software Foundation; either |
748086b7 | 10 | version 3 of the License, or (at your option) any later version. |
6de9cd9a DN |
11 | |
12 | Libgfortran is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
57dea9f6 | 15 | GNU General Public License for more details. |
6de9cd9a | 16 | |
748086b7 JJ |
17 | Under Section 7 of GPL version 3, you are granted additional |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
6de9cd9a | 25 | |
36ae8a61 | 26 | #include "libgfortran.h" |
6de9cd9a | 27 | #include <stdlib.h> |
36ae8a61 FXC |
28 | #include <assert.h>' |
29 | ||
c9e66eda | 30 | include(iparm.m4)dnl |
6de9cd9a | 31 | |
adea5e16 | 32 | `#if defined (HAVE_'rtype_name`) |
644cb69f | 33 | |
6de9cd9a DN |
34 | /* Dimensions: retarray(x,y) a(x, count) b(count,y). |
35 | Either a or b can be rank 1. In this case x or y is 1. */ | |
7d7b8bfe | 36 | |
adea5e16 | 37 | extern void matmul_'rtype_code` ('rtype` * const restrict, |
28dc6b33 | 38 | gfc_array_l1 * const restrict, gfc_array_l1 * const restrict); |
adea5e16 | 39 | export_proto(matmul_'rtype_code`); |
7d7b8bfe | 40 | |
6de9cd9a | 41 | void |
adea5e16 | 42 | matmul_'rtype_code` ('rtype` * const restrict retarray, |
28dc6b33 | 43 | gfc_array_l1 * const restrict a, gfc_array_l1 * const restrict b) |
6de9cd9a | 44 | { |
28dc6b33 TK |
45 | const GFC_LOGICAL_1 * restrict abase; |
46 | const GFC_LOGICAL_1 * restrict bbase; | |
adea5e16 | 47 | 'rtype_name` * restrict dest; |
6de9cd9a DN |
48 | index_type rxstride; |
49 | index_type rystride; | |
50 | index_type xcount; | |
51 | index_type ycount; | |
52 | index_type xstride; | |
53 | index_type ystride; | |
54 | index_type x; | |
55 | index_type y; | |
28dc6b33 TK |
56 | int a_kind; |
57 | int b_kind; | |
6de9cd9a | 58 | |
28dc6b33 TK |
59 | const GFC_LOGICAL_1 * restrict pa; |
60 | const GFC_LOGICAL_1 * restrict pb; | |
6de9cd9a DN |
61 | index_type astride; |
62 | index_type bstride; | |
63 | index_type count; | |
64 | index_type n; | |
65 | ||
66 | assert (GFC_DESCRIPTOR_RANK (a) == 2 | |
67 | || GFC_DESCRIPTOR_RANK (b) == 2); | |
883c9d4d VL |
68 | |
69 | if (retarray->data == NULL) | |
70 | { | |
71 | if (GFC_DESCRIPTOR_RANK (a) == 1) | |
72 | { | |
dfb55fdc TK |
73 | GFC_DIMENSION_SET(retarray->dim[0], 0, |
74 | GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); | |
883c9d4d VL |
75 | } |
76 | else if (GFC_DESCRIPTOR_RANK (b) == 1) | |
77 | { | |
dfb55fdc TK |
78 | GFC_DIMENSION_SET(retarray->dim[0], 0, |
79 | GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); | |
883c9d4d VL |
80 | } |
81 | else | |
82 | { | |
dfb55fdc TK |
83 | GFC_DIMENSION_SET(retarray->dim[0], 0, |
84 | GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); | |
85 | ||
86 | GFC_DIMENSION_SET(retarray->dim[1], 0, | |
87 | GFC_DESCRIPTOR_EXTENT(b,1) - 1, | |
88 | GFC_DESCRIPTOR_EXTENT(retarray,0)); | |
883c9d4d VL |
89 | } |
90 | ||
07d3cebe | 91 | retarray->data |
adea5e16 | 92 | = internal_malloc_size (sizeof ('rtype_name`) * size0 ((array_t *) retarray)); |
efd4dc1a | 93 | retarray->offset = 0; |
883c9d4d | 94 | } |
9731c4a3 | 95 | else if (unlikely (compile_options.bounds_check)) |
9ad13e91 TK |
96 | { |
97 | index_type ret_extent, arg_extent; | |
98 | ||
99 | if (GFC_DESCRIPTOR_RANK (a) == 1) | |
100 | { | |
dfb55fdc TK |
101 | arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); |
102 | ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); | |
9ad13e91 TK |
103 | if (arg_extent != ret_extent) |
104 | runtime_error ("Incorrect extent in return array in" | |
105 | " MATMUL intrinsic: is %ld, should be %ld", | |
106 | (long int) ret_extent, (long int) arg_extent); | |
107 | } | |
108 | else if (GFC_DESCRIPTOR_RANK (b) == 1) | |
109 | { | |
dfb55fdc TK |
110 | arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); |
111 | ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); | |
9ad13e91 TK |
112 | if (arg_extent != ret_extent) |
113 | runtime_error ("Incorrect extent in return array in" | |
114 | " MATMUL intrinsic: is %ld, should be %ld", | |
115 | (long int) ret_extent, (long int) arg_extent); | |
116 | } | |
117 | else | |
118 | { | |
dfb55fdc TK |
119 | arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); |
120 | ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); | |
9ad13e91 TK |
121 | if (arg_extent != ret_extent) |
122 | runtime_error ("Incorrect extent in return array in" | |
123 | " MATMUL intrinsic for dimension 1:" | |
124 | " is %ld, should be %ld", | |
125 | (long int) ret_extent, (long int) arg_extent); | |
126 | ||
dfb55fdc TK |
127 | arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); |
128 | ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); | |
9ad13e91 TK |
129 | if (arg_extent != ret_extent) |
130 | runtime_error ("Incorrect extent in return array in" | |
131 | " MATMUL intrinsic for dimension 2:" | |
132 | " is %ld, should be %ld", | |
133 | (long int) ret_extent, (long int) arg_extent); | |
134 | } | |
135 | } | |
883c9d4d | 136 | |
6de9cd9a | 137 | abase = a->data; |
28dc6b33 TK |
138 | a_kind = GFC_DESCRIPTOR_SIZE (a); |
139 | ||
140 | if (a_kind == 1 || a_kind == 2 || a_kind == 4 || a_kind == 8 | |
141 | #ifdef HAVE_GFC_LOGICAL_16 | |
142 | || a_kind == 16 | |
143 | #endif | |
144 | ) | |
145 | abase = GFOR_POINTER_TO_L1 (abase, a_kind); | |
146 | else | |
147 | internal_error (NULL, "Funny sized logical array"); | |
148 | ||
6de9cd9a | 149 | bbase = b->data; |
28dc6b33 TK |
150 | b_kind = GFC_DESCRIPTOR_SIZE (b); |
151 | ||
152 | if (b_kind == 1 || b_kind == 2 || b_kind == 4 || b_kind == 8 | |
153 | #ifdef HAVE_GFC_LOGICAL_16 | |
154 | || b_kind == 16 | |
155 | #endif | |
156 | ) | |
157 | bbase = GFOR_POINTER_TO_L1 (bbase, b_kind); | |
158 | else | |
159 | internal_error (NULL, "Funny sized logical array"); | |
160 | ||
6de9cd9a | 161 | dest = retarray->data; |
adea5e16 | 162 | ' |
6de9cd9a | 163 | sinclude(`matmul_asm_'rtype_code`.m4')dnl |
adea5e16 | 164 | ` |
6de9cd9a DN |
165 | if (GFC_DESCRIPTOR_RANK (retarray) == 1) |
166 | { | |
dfb55fdc | 167 | rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); |
6de9cd9a DN |
168 | rystride = rxstride; |
169 | } | |
170 | else | |
171 | { | |
dfb55fdc TK |
172 | rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); |
173 | rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); | |
6de9cd9a DN |
174 | } |
175 | ||
176 | /* If we have rank 1 parameters, zero the absent stride, and set the size to | |
177 | one. */ | |
178 | if (GFC_DESCRIPTOR_RANK (a) == 1) | |
179 | { | |
dfb55fdc TK |
180 | astride = GFC_DESCRIPTOR_STRIDE_BYTES(a,0); |
181 | count = GFC_DESCRIPTOR_EXTENT(a,0); | |
6de9cd9a DN |
182 | xstride = 0; |
183 | rxstride = 0; | |
184 | xcount = 1; | |
185 | } | |
186 | else | |
187 | { | |
dfb55fdc TK |
188 | astride = GFC_DESCRIPTOR_STRIDE_BYTES(a,1); |
189 | count = GFC_DESCRIPTOR_EXTENT(a,1); | |
190 | xstride = GFC_DESCRIPTOR_STRIDE_BYTES(a,0); | |
191 | xcount = GFC_DESCRIPTOR_EXTENT(a,0); | |
6de9cd9a DN |
192 | } |
193 | if (GFC_DESCRIPTOR_RANK (b) == 1) | |
194 | { | |
dfb55fdc TK |
195 | bstride = GFC_DESCRIPTOR_STRIDE_BYTES(b,0); |
196 | assert(count == GFC_DESCRIPTOR_EXTENT(b,0)); | |
6de9cd9a DN |
197 | ystride = 0; |
198 | rystride = 0; | |
199 | ycount = 1; | |
200 | } | |
201 | else | |
202 | { | |
dfb55fdc TK |
203 | bstride = GFC_DESCRIPTOR_STRIDE_BYTES(b,0); |
204 | assert(count == GFC_DESCRIPTOR_EXTENT(b,0)); | |
205 | ystride = GFC_DESCRIPTOR_STRIDE_BYTES(b,1); | |
206 | ycount = GFC_DESCRIPTOR_EXTENT(b,1); | |
6de9cd9a DN |
207 | } |
208 | ||
209 | for (y = 0; y < ycount; y++) | |
210 | { | |
211 | for (x = 0; x < xcount; x++) | |
212 | { | |
213 | /* Do the summation for this element. For real and integer types | |
214 | this is the same as DOT_PRODUCT. For complex types we use do | |
215 | a*b, not conjg(a)*b. */ | |
216 | pa = abase; | |
217 | pb = bbase; | |
218 | *dest = 0; | |
219 | ||
220 | for (n = 0; n < count; n++) | |
221 | { | |
222 | if (*pa && *pb) | |
223 | { | |
224 | *dest = 1; | |
225 | break; | |
226 | } | |
227 | pa += astride; | |
228 | pb += bstride; | |
229 | } | |
230 | ||
231 | dest += rxstride; | |
232 | abase += xstride; | |
233 | } | |
234 | abase -= xstride * xcount; | |
235 | bbase += ystride; | |
236 | dest += rystride - (rxstride * xcount); | |
237 | } | |
238 | } | |
644cb69f FXC |
239 | |
240 | #endif | |
28dc6b33 | 241 | ' |