]>
Commit | Line | Data |
---|---|---|
c2b00cdc | 1 | /* Helper function for cshift functions. |
a945c346 | 2 | Copyright (C) 2008-2024 Free Software Foundation, Inc. |
c2b00cdc TK |
3 | Contributed by Thomas Koenig <tkoenig@gcc.gnu.org> |
4 | ||
21d1335b | 5 | This file is part of the GNU Fortran runtime library (libgfortran). |
c2b00cdc TK |
6 | |
7 | Libgfortran is free software; you can redistribute it and/or | |
8 | modify it under the terms of the GNU General Public | |
9 | License as published by the Free Software Foundation; either | |
748086b7 | 10 | version 3 of the License, or (at your option) any later version. |
c2b00cdc TK |
11 | |
12 | Libgfortran is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
748086b7 JJ |
17 | Under Section 7 of GPL version 3, you are granted additional |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
c2b00cdc TK |
25 | |
26 | #include "libgfortran.h" | |
c2b00cdc TK |
27 | #include <string.h> |
28 | ||
29 | ||
30 | #if defined (HAVE_GFC_REAL_4) | |
31 | ||
32 | void | |
44720bef | 33 | cshift0_r4 (gfc_array_r4 *ret, const gfc_array_r4 *array, ptrdiff_t shift, |
c2b00cdc TK |
34 | int which) |
35 | { | |
36 | /* r.* indicates the return array. */ | |
37 | index_type rstride[GFC_MAX_DIMENSIONS]; | |
38 | index_type rstride0; | |
39 | index_type roffset; | |
40 | GFC_REAL_4 *rptr; | |
41 | ||
42 | /* s.* indicates the source array. */ | |
43 | index_type sstride[GFC_MAX_DIMENSIONS]; | |
44 | index_type sstride0; | |
45 | index_type soffset; | |
46 | const GFC_REAL_4 *sptr; | |
47 | ||
48 | index_type count[GFC_MAX_DIMENSIONS]; | |
49 | index_type extent[GFC_MAX_DIMENSIONS]; | |
50 | index_type dim; | |
51 | index_type len; | |
52 | index_type n; | |
53 | ||
5dace4bf TK |
54 | bool do_blocked; |
55 | index_type r_ex, a_ex; | |
56 | ||
c2b00cdc TK |
57 | which = which - 1; |
58 | sstride[0] = 0; | |
59 | rstride[0] = 0; | |
60 | ||
61 | extent[0] = 1; | |
62 | count[0] = 0; | |
63 | n = 0; | |
64 | /* Initialized for avoiding compiler warnings. */ | |
65 | roffset = 1; | |
66 | soffset = 1; | |
67 | len = 0; | |
68 | ||
5dace4bf TK |
69 | r_ex = 1; |
70 | a_ex = 1; | |
71 | ||
72 | if (which > 0) | |
c2b00cdc | 73 | { |
5dace4bf TK |
74 | /* Test if both ret and array are contiguous. */ |
75 | do_blocked = true; | |
76 | dim = GFC_DESCRIPTOR_RANK (array); | |
77 | for (n = 0; n < dim; n ++) | |
78 | { | |
79 | index_type rs, as; | |
80 | rs = GFC_DESCRIPTOR_STRIDE (ret, n); | |
81 | if (rs != r_ex) | |
82 | { | |
83 | do_blocked = false; | |
84 | break; | |
85 | } | |
86 | as = GFC_DESCRIPTOR_STRIDE (array, n); | |
87 | if (as != a_ex) | |
88 | { | |
89 | do_blocked = false; | |
90 | break; | |
91 | } | |
92 | r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); | |
93 | a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); | |
94 | } | |
95 | } | |
96 | else | |
97 | do_blocked = false; | |
98 | ||
99 | n = 0; | |
100 | ||
101 | if (do_blocked) | |
102 | { | |
103 | /* For contiguous arrays, use the relationship that | |
104 | ||
105 | dimension(n1,n2,n3) :: a, b | |
106 | b = cshift(a,sh,3) | |
107 | ||
108 | can be dealt with as if | |
109 | ||
110 | dimension(n1*n2*n3) :: an, bn | |
111 | bn = cshift(a,sh*n1*n2,1) | |
112 | ||
113 | we can used a more blocked algorithm for dim>1. */ | |
114 | sstride[0] = 1; | |
115 | rstride[0] = 1; | |
116 | roffset = 1; | |
117 | soffset = 1; | |
118 | len = GFC_DESCRIPTOR_STRIDE(array, which) | |
119 | * GFC_DESCRIPTOR_EXTENT(array, which); | |
120 | shift *= GFC_DESCRIPTOR_STRIDE(array, which); | |
121 | for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) | |
122 | { | |
123 | count[n] = 0; | |
124 | extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); | |
125 | rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); | |
126 | sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); | |
127 | n++; | |
128 | } | |
129 | dim = GFC_DESCRIPTOR_RANK (array) - which; | |
130 | } | |
131 | else | |
132 | { | |
133 | for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) | |
134 | { | |
135 | if (dim == which) | |
136 | { | |
137 | roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); | |
138 | if (roffset == 0) | |
139 | roffset = 1; | |
140 | soffset = GFC_DESCRIPTOR_STRIDE(array,dim); | |
141 | if (soffset == 0) | |
142 | soffset = 1; | |
143 | len = GFC_DESCRIPTOR_EXTENT(array,dim); | |
144 | } | |
145 | else | |
146 | { | |
147 | count[n] = 0; | |
148 | extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); | |
149 | rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); | |
150 | sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); | |
151 | n++; | |
152 | } | |
153 | } | |
154 | if (sstride[0] == 0) | |
155 | sstride[0] = 1; | |
156 | if (rstride[0] == 0) | |
157 | rstride[0] = 1; | |
158 | ||
159 | dim = GFC_DESCRIPTOR_RANK (array); | |
c2b00cdc | 160 | } |
c2b00cdc | 161 | |
c2b00cdc TK |
162 | rstride0 = rstride[0]; |
163 | sstride0 = sstride[0]; | |
21d1335b TB |
164 | rptr = ret->base_addr; |
165 | sptr = array->base_addr; | |
c2b00cdc | 166 | |
b43645b8 MM |
167 | /* Avoid the costly modulo for trivially in-bound shifts. */ |
168 | if (shift < 0 || shift >= len) | |
169 | { | |
170 | shift = len == 0 ? 0 : shift % (ptrdiff_t)len; | |
171 | if (shift < 0) | |
172 | shift += len; | |
173 | } | |
c2b00cdc TK |
174 | |
175 | while (rptr) | |
176 | { | |
177 | /* Do the shift for this dimension. */ | |
178 | ||
179 | /* If elements are contiguous, perform the operation | |
180 | in two block moves. */ | |
181 | if (soffset == 1 && roffset == 1) | |
182 | { | |
183 | size_t len1 = shift * sizeof (GFC_REAL_4); | |
184 | size_t len2 = (len - shift) * sizeof (GFC_REAL_4); | |
185 | memcpy (rptr, sptr + shift, len2); | |
186 | memcpy (rptr + (len - shift), sptr, len1); | |
187 | } | |
188 | else | |
189 | { | |
190 | /* Otherwise, we will have to perform the copy one element at | |
191 | a time. */ | |
192 | GFC_REAL_4 *dest = rptr; | |
193 | const GFC_REAL_4 *src = &sptr[shift * soffset]; | |
194 | ||
195 | for (n = 0; n < len - shift; n++) | |
196 | { | |
197 | *dest = *src; | |
198 | dest += roffset; | |
199 | src += soffset; | |
200 | } | |
201 | for (src = sptr, n = 0; n < shift; n++) | |
202 | { | |
203 | *dest = *src; | |
204 | dest += roffset; | |
205 | src += soffset; | |
206 | } | |
207 | } | |
208 | ||
209 | /* Advance to the next section. */ | |
210 | rptr += rstride0; | |
211 | sptr += sstride0; | |
212 | count[0]++; | |
213 | n = 0; | |
214 | while (count[n] == extent[n]) | |
215 | { | |
216 | /* When we get to the end of a dimension, reset it and increment | |
217 | the next dimension. */ | |
218 | count[n] = 0; | |
219 | /* We could precalculate these products, but this is a less | |
220 | frequently used path so probably not worth it. */ | |
221 | rptr -= rstride[n] * extent[n]; | |
222 | sptr -= sstride[n] * extent[n]; | |
223 | n++; | |
224 | if (n >= dim - 1) | |
225 | { | |
226 | /* Break out of the loop. */ | |
227 | rptr = NULL; | |
228 | break; | |
229 | } | |
230 | else | |
231 | { | |
232 | count[n]++; | |
233 | rptr += rstride[n]; | |
234 | sptr += sstride[n]; | |
235 | } | |
236 | } | |
237 | } | |
238 | ||
239 | return; | |
240 | } | |
241 | ||
242 | #endif |