]>
Commit | Line | Data |
---|---|---|
6de9cd9a DN |
1 | /* Implementation of the MATMUL intrinsic |
2 | Copyright 2002 Free Software Foundation, Inc. | |
3 | Contributed by Paul Brook <paul@nowt.org> | |
4 | ||
5 | This file is part of the GNU Fortran 95 runtime library (libgfor). | |
6 | ||
7 | Libgfortran is free software; you can redistribute it and/or | |
8 | modify it under the terms of the GNU Lesser General Public | |
9 | License as published by the Free Software Foundation; either | |
10 | version 2.1 of the License, or (at your option) any later version. | |
11 | ||
12 | Libgfortran is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU Lesser General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU Lesser General Public | |
18 | License along with libgfor; see the file COPYING.LIB. If not, | |
19 | write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
20 | Boston, MA 02111-1307, USA. */ | |
21 | ||
22 | #include "config.h" | |
23 | #include <stdlib.h> | |
24 | #include <assert.h> | |
25 | #include "libgfortran.h" | |
26 | ||
27 | /* Dimensions: retarray(x,y) a(x, count) b(count,y). | |
28 | Either a or b can be rank 1. In this case x or y is 1. */ | |
29 | void | |
30 | __matmul_i8 (gfc_array_i8 * retarray, gfc_array_i8 * a, gfc_array_i8 * b) | |
31 | { | |
32 | GFC_INTEGER_8 *abase; | |
33 | GFC_INTEGER_8 *bbase; | |
34 | GFC_INTEGER_8 *dest; | |
35 | GFC_INTEGER_8 res; | |
36 | index_type rxstride; | |
37 | index_type rystride; | |
38 | index_type xcount; | |
39 | index_type ycount; | |
40 | index_type xstride; | |
41 | index_type ystride; | |
42 | index_type x; | |
43 | index_type y; | |
44 | ||
45 | GFC_INTEGER_8 *pa; | |
46 | GFC_INTEGER_8 *pb; | |
47 | index_type astride; | |
48 | index_type bstride; | |
49 | index_type count; | |
50 | index_type n; | |
51 | ||
52 | assert (GFC_DESCRIPTOR_RANK (a) == 2 | |
53 | || GFC_DESCRIPTOR_RANK (b) == 2); | |
54 | abase = a->data; | |
55 | bbase = b->data; | |
56 | dest = retarray->data; | |
57 | ||
58 | if (retarray->dim[0].stride == 0) | |
59 | retarray->dim[0].stride = 1; | |
60 | if (a->dim[0].stride == 0) | |
61 | a->dim[0].stride = 1; | |
62 | if (b->dim[0].stride == 0) | |
63 | b->dim[0].stride = 1; | |
64 | ||
65 | ||
66 | if (GFC_DESCRIPTOR_RANK (retarray) == 1) | |
67 | { | |
68 | rxstride = retarray->dim[0].stride; | |
69 | rystride = rxstride; | |
70 | } | |
71 | else | |
72 | { | |
73 | rxstride = retarray->dim[0].stride; | |
74 | rystride = retarray->dim[1].stride; | |
75 | } | |
76 | ||
77 | /* If we have rank 1 parameters, zero the absent stride, and set the size to | |
78 | one. */ | |
79 | if (GFC_DESCRIPTOR_RANK (a) == 1) | |
80 | { | |
81 | astride = a->dim[0].stride; | |
82 | count = a->dim[0].ubound + 1 - a->dim[0].lbound; | |
83 | xstride = 0; | |
84 | rxstride = 0; | |
85 | xcount = 1; | |
86 | } | |
87 | else | |
88 | { | |
89 | astride = a->dim[1].stride; | |
90 | count = a->dim[1].ubound + 1 - a->dim[1].lbound; | |
91 | xstride = a->dim[0].stride; | |
92 | xcount = a->dim[0].ubound + 1 - a->dim[0].lbound; | |
93 | } | |
94 | if (GFC_DESCRIPTOR_RANK (b) == 1) | |
95 | { | |
96 | bstride = b->dim[0].stride; | |
97 | assert(count == b->dim[0].ubound + 1 - b->dim[0].lbound); | |
98 | ystride = 0; | |
99 | rystride = 0; | |
100 | ycount = 1; | |
101 | } | |
102 | else | |
103 | { | |
104 | bstride = b->dim[0].stride; | |
105 | assert(count == b->dim[0].ubound + 1 - b->dim[0].lbound); | |
106 | ystride = b->dim[1].stride; | |
107 | ycount = b->dim[1].ubound + 1 - b->dim[1].lbound; | |
108 | } | |
109 | ||
110 | for (y = 0; y < ycount; y++) | |
111 | { | |
112 | for (x = 0; x < xcount; x++) | |
113 | { | |
114 | /* Do the summation for this element. For real and integer types | |
115 | this is the same as DOT_PRODUCT. For complex types we use do | |
116 | a*b, not conjg(a)*b. */ | |
117 | pa = abase; | |
118 | pb = bbase; | |
119 | res = 0; | |
120 | ||
121 | for (n = 0; n < count; n++) | |
122 | { | |
123 | res += *pa * *pb; | |
124 | pa += astride; | |
125 | pb += bstride; | |
126 | } | |
127 | ||
128 | *dest = res; | |
129 | ||
130 | dest += rxstride; | |
131 | abase += xstride; | |
132 | } | |
133 | abase -= xstride * xcount; | |
134 | bbase += ystride; | |
135 | dest += rystride - (rxstride * xcount); | |
136 | } | |
137 | } | |
138 |