]>
Commit | Line | Data |
---|---|---|
4ee9c684 | 1 | /* Generic implementation of the CSHIFT intrinsic |
f1717362 | 2 | Copyright (C) 2003-2016 Free Software Foundation, Inc. |
4ee9c684 | 3 | Contributed by Feng Wang <wf_cs@yahoo.com> |
4 | ||
6625f1d7 | 5 | This file is part of the GNU Fortran runtime library (libgfortran). |
4ee9c684 | 6 | |
7 | Libgfortran is free software; you can redistribute it and/or | |
b417ea8c | 8 | modify it under the terms of the GNU General Public |
4ee9c684 | 9 | License as published by the Free Software Foundation; either |
6625f1d7 | 10 | version 3 of the License, or (at your option) any later version. |
b417ea8c | 11 | |
12 | Libgfortran is distributed in the hope that it will be useful, | |
4ee9c684 | 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
b417ea8c | 15 | GNU General Public License for more details. |
4ee9c684 | 16 | |
6625f1d7 | 17 | Under Section 7 of GPL version 3, you are granted additional |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
4ee9c684 | 25 | |
41f2d5e8 | 26 | #include "libgfortran.h" |
4ee9c684 | 27 | #include <stdlib.h> |
28 | #include <assert.h> | |
29 | #include <string.h> | |
4ee9c684 | 30 | |
4ee9c684 | 31 | static void |
820b4fbd | 32 | cshift0 (gfc_array_char * ret, const gfc_array_char * array, |
c75dca49 | 33 | ptrdiff_t shift, int which, index_type size) |
4ee9c684 | 34 | { |
35 | /* r.* indicates the return array. */ | |
9130521e | 36 | index_type rstride[GFC_MAX_DIMENSIONS]; |
4ee9c684 | 37 | index_type rstride0; |
38 | index_type roffset; | |
39 | char *rptr; | |
5ac85af2 | 40 | |
4ee9c684 | 41 | /* s.* indicates the source array. */ |
9130521e | 42 | index_type sstride[GFC_MAX_DIMENSIONS]; |
4ee9c684 | 43 | index_type sstride0; |
44 | index_type soffset; | |
45 | const char *sptr; | |
4ee9c684 | 46 | |
9130521e | 47 | index_type count[GFC_MAX_DIMENSIONS]; |
48 | index_type extent[GFC_MAX_DIMENSIONS]; | |
4ee9c684 | 49 | index_type dim; |
4ee9c684 | 50 | index_type len; |
51 | index_type n; | |
74a175c1 | 52 | index_type arraysize; |
4ee9c684 | 53 | |
95f15c5b | 54 | index_type type_size; |
55 | ||
4ee9c684 | 56 | if (which < 1 || which > GFC_DESCRIPTOR_RANK (array)) |
57 | runtime_error ("Argument 'DIM' is out of range in call to 'CSHIFT'"); | |
58 | ||
74a175c1 | 59 | arraysize = size0 ((array_t *) array); |
60 | ||
553877d9 | 61 | if (ret->base_addr == NULL) |
74a175c1 | 62 | { |
63 | int i; | |
64 | ||
65 | ret->offset = 0; | |
66 | ret->dtype = array->dtype; | |
67 | for (i = 0; i < GFC_DESCRIPTOR_RANK (array); i++) | |
68 | { | |
827aef63 | 69 | index_type ub, str; |
70 | ||
71 | ub = GFC_DESCRIPTOR_EXTENT(array,i) - 1; | |
74a175c1 | 72 | |
73 | if (i == 0) | |
827aef63 | 74 | str = 1; |
74a175c1 | 75 | else |
827aef63 | 76 | str = GFC_DESCRIPTOR_EXTENT(ret,i-1) * |
77 | GFC_DESCRIPTOR_STRIDE(ret,i-1); | |
78 | ||
79 | GFC_DIMENSION_SET(ret->dim[i], 0, ub, str); | |
74a175c1 | 80 | } |
81 | ||
af1e9051 | 82 | /* xmallocarray allocates a single byte for zero size. */ |
83 | ret->base_addr = xmallocarray (arraysize, size); | |
74a175c1 | 84 | } |
5d04d450 | 85 | else if (unlikely (compile_options.bounds_check)) |
86 | { | |
87 | bounds_equal_extents ((array_t *) ret, (array_t *) array, | |
88 | "return value", "CSHIFT"); | |
89 | } | |
90 | ||
74a175c1 | 91 | if (arraysize == 0) |
92 | return; | |
5d04d450 | 93 | |
95f15c5b | 94 | type_size = GFC_DTYPE_TYPE_SIZE (array); |
74a175c1 | 95 | |
95f15c5b | 96 | switch(type_size) |
97 | { | |
98 | case GFC_DTYPE_LOGICAL_1: | |
99 | case GFC_DTYPE_INTEGER_1: | |
100 | case GFC_DTYPE_DERIVED_1: | |
101 | cshift0_i1 ((gfc_array_i1 *)ret, (gfc_array_i1 *) array, shift, which); | |
102 | return; | |
103 | ||
104 | case GFC_DTYPE_LOGICAL_2: | |
105 | case GFC_DTYPE_INTEGER_2: | |
106 | cshift0_i2 ((gfc_array_i2 *)ret, (gfc_array_i2 *) array, shift, which); | |
107 | return; | |
108 | ||
109 | case GFC_DTYPE_LOGICAL_4: | |
110 | case GFC_DTYPE_INTEGER_4: | |
111 | cshift0_i4 ((gfc_array_i4 *)ret, (gfc_array_i4 *) array, shift, which); | |
112 | return; | |
113 | ||
114 | case GFC_DTYPE_LOGICAL_8: | |
115 | case GFC_DTYPE_INTEGER_8: | |
116 | cshift0_i8 ((gfc_array_i8 *)ret, (gfc_array_i8 *) array, shift, which); | |
117 | return; | |
118 | ||
119 | #ifdef HAVE_GFC_INTEGER_16 | |
120 | case GFC_DTYPE_LOGICAL_16: | |
121 | case GFC_DTYPE_INTEGER_16: | |
52e3f42a | 122 | cshift0_i16 ((gfc_array_i16 *)ret, (gfc_array_i16 *) array, shift, |
95f15c5b | 123 | which); |
124 | return; | |
125 | #endif | |
126 | ||
127 | case GFC_DTYPE_REAL_4: | |
128 | cshift0_r4 ((gfc_array_r4 *)ret, (gfc_array_r4 *) array, shift, which); | |
129 | return; | |
130 | ||
131 | case GFC_DTYPE_REAL_8: | |
132 | cshift0_r8 ((gfc_array_r8 *)ret, (gfc_array_r8 *) array, shift, which); | |
133 | return; | |
134 | ||
87969c8c | 135 | /* FIXME: This here is a hack, which will have to be removed when |
136 | the array descriptor is reworked. Currently, we don't store the | |
137 | kind value for the type, but only the size. Because on targets with | |
138 | __float128, we have sizeof(logn double) == sizeof(__float128), | |
139 | we cannot discriminate here and have to fall back to the generic | |
140 | handling (which is suboptimal). */ | |
141 | #if !defined(GFC_REAL_16_IS_FLOAT128) | |
142 | # ifdef HAVE_GFC_REAL_10 | |
95f15c5b | 143 | case GFC_DTYPE_REAL_10: |
144 | cshift0_r10 ((gfc_array_r10 *)ret, (gfc_array_r10 *) array, shift, | |
145 | which); | |
146 | return; | |
87969c8c | 147 | # endif |
95f15c5b | 148 | |
87969c8c | 149 | # ifdef HAVE_GFC_REAL_16 |
95f15c5b | 150 | case GFC_DTYPE_REAL_16: |
151 | cshift0_r16 ((gfc_array_r16 *)ret, (gfc_array_r16 *) array, shift, | |
152 | which); | |
153 | return; | |
87969c8c | 154 | # endif |
95f15c5b | 155 | #endif |
156 | ||
157 | case GFC_DTYPE_COMPLEX_4: | |
158 | cshift0_c4 ((gfc_array_c4 *)ret, (gfc_array_c4 *) array, shift, which); | |
159 | return; | |
160 | ||
161 | case GFC_DTYPE_COMPLEX_8: | |
162 | cshift0_c8 ((gfc_array_c8 *)ret, (gfc_array_c8 *) array, shift, which); | |
163 | return; | |
164 | ||
87969c8c | 165 | /* FIXME: This here is a hack, which will have to be removed when |
166 | the array descriptor is reworked. Currently, we don't store the | |
167 | kind value for the type, but only the size. Because on targets with | |
168 | __float128, we have sizeof(logn double) == sizeof(__float128), | |
169 | we cannot discriminate here and have to fall back to the generic | |
170 | handling (which is suboptimal). */ | |
171 | #if !defined(GFC_REAL_16_IS_FLOAT128) | |
172 | # ifdef HAVE_GFC_COMPLEX_10 | |
95f15c5b | 173 | case GFC_DTYPE_COMPLEX_10: |
174 | cshift0_c10 ((gfc_array_c10 *)ret, (gfc_array_c10 *) array, shift, | |
175 | which); | |
176 | return; | |
87969c8c | 177 | # endif |
95f15c5b | 178 | |
87969c8c | 179 | # ifdef HAVE_GFC_COMPLEX_16 |
95f15c5b | 180 | case GFC_DTYPE_COMPLEX_16: |
181 | cshift0_c16 ((gfc_array_c16 *)ret, (gfc_array_c16 *) array, shift, | |
182 | which); | |
183 | return; | |
87969c8c | 184 | # endif |
95f15c5b | 185 | #endif |
4ee9c684 | 186 | |
95f15c5b | 187 | default: |
188 | break; | |
189 | } | |
4ee9c684 | 190 | |
95f15c5b | 191 | switch (size) |
1c03ad1f | 192 | { |
95f15c5b | 193 | /* Let's check the actual alignment of the data pointers. If they |
194 | are suitably aligned, we can safely call the unpack functions. */ | |
1c03ad1f | 195 | |
95f15c5b | 196 | case sizeof (GFC_INTEGER_1): |
197 | cshift0_i1 ((gfc_array_i1 *) ret, (gfc_array_i1 *) array, shift, | |
198 | which); | |
1c03ad1f | 199 | break; |
200 | ||
95f15c5b | 201 | case sizeof (GFC_INTEGER_2): |
553877d9 | 202 | if (GFC_UNALIGNED_2(ret->base_addr) || GFC_UNALIGNED_2(array->base_addr)) |
95f15c5b | 203 | break; |
204 | else | |
205 | { | |
206 | cshift0_i2 ((gfc_array_i2 *) ret, (gfc_array_i2 *) array, shift, | |
207 | which); | |
208 | return; | |
209 | } | |
210 | ||
211 | case sizeof (GFC_INTEGER_4): | |
553877d9 | 212 | if (GFC_UNALIGNED_4(ret->base_addr) || GFC_UNALIGNED_4(array->base_addr)) |
95f15c5b | 213 | break; |
214 | else | |
215 | { | |
216 | cshift0_i4 ((gfc_array_i4 *)ret, (gfc_array_i4 *) array, shift, | |
217 | which); | |
218 | return; | |
219 | } | |
220 | ||
221 | case sizeof (GFC_INTEGER_8): | |
553877d9 | 222 | if (GFC_UNALIGNED_8(ret->base_addr) || GFC_UNALIGNED_8(array->base_addr)) |
95f15c5b | 223 | { |
224 | /* Let's try to use the complex routines. First, a sanity | |
225 | check that the sizes match; this should be optimized to | |
226 | a no-op. */ | |
227 | if (sizeof(GFC_INTEGER_8) != sizeof(GFC_COMPLEX_4)) | |
228 | break; | |
229 | ||
553877d9 | 230 | if (GFC_UNALIGNED_C4(ret->base_addr) |
231 | || GFC_UNALIGNED_C4(array->base_addr)) | |
95f15c5b | 232 | break; |
233 | ||
234 | cshift0_c4 ((gfc_array_c4 *) ret, (gfc_array_c4 *) array, shift, | |
235 | which); | |
83c813ac | 236 | return; |
95f15c5b | 237 | } |
238 | else | |
239 | { | |
240 | cshift0_i8 ((gfc_array_i8 *)ret, (gfc_array_i8 *) array, shift, | |
241 | which); | |
242 | return; | |
243 | } | |
244 | ||
245 | #ifdef HAVE_GFC_INTEGER_16 | |
246 | case sizeof (GFC_INTEGER_16): | |
553877d9 | 247 | if (GFC_UNALIGNED_16(ret->base_addr) |
248 | || GFC_UNALIGNED_16(array->base_addr)) | |
95f15c5b | 249 | { |
250 | /* Let's try to use the complex routines. First, a sanity | |
251 | check that the sizes match; this should be optimized to | |
252 | a no-op. */ | |
52e3f42a | 253 | if (sizeof(GFC_INTEGER_16) != sizeof(GFC_COMPLEX_8)) |
95f15c5b | 254 | break; |
255 | ||
553877d9 | 256 | if (GFC_UNALIGNED_C8(ret->base_addr) |
257 | || GFC_UNALIGNED_C8(array->base_addr)) | |
95f15c5b | 258 | break; |
259 | ||
260 | cshift0_c8 ((gfc_array_c8 *) ret, (gfc_array_c8 *) array, shift, | |
261 | which); | |
83c813ac | 262 | return; |
95f15c5b | 263 | } |
264 | else | |
265 | { | |
266 | cshift0_i16 ((gfc_array_i16 *) ret, (gfc_array_i16 *) array, | |
267 | shift, which); | |
268 | return; | |
269 | } | |
270 | #else | |
271 | case sizeof (GFC_COMPLEX_8): | |
272 | ||
553877d9 | 273 | if (GFC_UNALIGNED_C8(ret->base_addr) |
274 | || GFC_UNALIGNED_C8(array->base_addr)) | |
95f15c5b | 275 | break; |
276 | else | |
277 | { | |
278 | cshift0_c8 ((gfc_array_c8 *) ret, (gfc_array_c8 *) array, shift, | |
279 | which); | |
280 | return; | |
281 | } | |
282 | #endif | |
283 | ||
1c03ad1f | 284 | default: |
285 | break; | |
286 | } | |
287 | ||
95f15c5b | 288 | |
289 | which = which - 1; | |
290 | sstride[0] = 0; | |
291 | rstride[0] = 0; | |
292 | ||
293 | extent[0] = 1; | |
294 | count[0] = 0; | |
295 | n = 0; | |
5ac85af2 | 296 | /* Initialized for avoiding compiler warnings. */ |
4ee9c684 | 297 | roffset = size; |
298 | soffset = size; | |
299 | len = 0; | |
300 | ||
301 | for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) | |
302 | { | |
303 | if (dim == which) | |
304 | { | |
827aef63 | 305 | roffset = GFC_DESCRIPTOR_STRIDE_BYTES(ret,dim); |
4ee9c684 | 306 | if (roffset == 0) |
307 | roffset = size; | |
827aef63 | 308 | soffset = GFC_DESCRIPTOR_STRIDE_BYTES(array,dim); |
4ee9c684 | 309 | if (soffset == 0) |
310 | soffset = size; | |
827aef63 | 311 | len = GFC_DESCRIPTOR_EXTENT(array,dim); |
4ee9c684 | 312 | } |
313 | else | |
314 | { | |
315 | count[n] = 0; | |
827aef63 | 316 | extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); |
317 | rstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(ret,dim); | |
318 | sstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(array,dim); | |
4ee9c684 | 319 | n++; |
320 | } | |
321 | } | |
322 | if (sstride[0] == 0) | |
323 | sstride[0] = size; | |
324 | if (rstride[0] == 0) | |
325 | rstride[0] = size; | |
326 | ||
327 | dim = GFC_DESCRIPTOR_RANK (array); | |
328 | rstride0 = rstride[0]; | |
329 | sstride0 = sstride[0]; | |
553877d9 | 330 | rptr = ret->base_addr; |
331 | sptr = array->base_addr; | |
4ee9c684 | 332 | |
c75dca49 | 333 | shift = len == 0 ? 0 : shift % (ptrdiff_t)len; |
4ee9c684 | 334 | if (shift < 0) |
335 | shift += len; | |
336 | ||
337 | while (rptr) | |
338 | { | |
339 | /* Do the shift for this dimension. */ | |
5ac85af2 | 340 | |
341 | /* If elements are contiguous, perform the operation | |
342 | in two block moves. */ | |
343 | if (soffset == size && roffset == size) | |
344 | { | |
345 | size_t len1 = shift * size; | |
346 | size_t len2 = (len - shift) * size; | |
347 | memcpy (rptr, sptr + len1, len2); | |
348 | memcpy (rptr + len2, sptr, len1); | |
349 | } | |
350 | else | |
351 | { | |
352 | /* Otherwise, we'll have to perform the copy one element at | |
95f15c5b | 353 | a time. */ |
354 | char *dest = rptr; | |
355 | const char *src = &sptr[shift * soffset]; | |
356 | ||
357 | for (n = 0; n < len - shift; n++) | |
358 | { | |
359 | memcpy (dest, src, size); | |
360 | dest += roffset; | |
361 | src += soffset; | |
362 | } | |
363 | for (src = sptr, n = 0; n < shift; n++) | |
5ac85af2 | 364 | { |
95f15c5b | 365 | memcpy (dest, src, size); |
366 | dest += roffset; | |
367 | src += soffset; | |
5ac85af2 | 368 | } |
369 | } | |
4ee9c684 | 370 | |
371 | /* Advance to the next section. */ | |
372 | rptr += rstride0; | |
373 | sptr += sstride0; | |
374 | count[0]++; | |
375 | n = 0; | |
376 | while (count[n] == extent[n]) | |
377 | { | |
378 | /* When we get to the end of a dimension, reset it and increment | |
379 | the next dimension. */ | |
380 | count[n] = 0; | |
381 | /* We could precalculate these products, but this is a less | |
a2ffc2c4 | 382 | frequently used path so probably not worth it. */ |
4ee9c684 | 383 | rptr -= rstride[n] * extent[n]; |
384 | sptr -= sstride[n] * extent[n]; | |
385 | n++; | |
386 | if (n >= dim - 1) | |
387 | { | |
388 | /* Break out of the loop. */ | |
389 | rptr = NULL; | |
390 | break; | |
391 | } | |
392 | else | |
393 | { | |
394 | count[n]++; | |
395 | rptr += rstride[n]; | |
396 | sptr += sstride[n]; | |
397 | } | |
398 | } | |
399 | } | |
400 | } | |
401 | ||
1a9a4a12 | 402 | #define DEFINE_CSHIFT(N) \ |
403 | extern void cshift0_##N (gfc_array_char *, const gfc_array_char *, \ | |
404 | const GFC_INTEGER_##N *, const GFC_INTEGER_##N *); \ | |
405 | export_proto(cshift0_##N); \ | |
406 | \ | |
407 | void \ | |
408 | cshift0_##N (gfc_array_char *ret, const gfc_array_char *array, \ | |
409 | const GFC_INTEGER_##N *pshift, const GFC_INTEGER_##N *pdim) \ | |
410 | { \ | |
411 | cshift0 (ret, array, *pshift, pdim ? *pdim : 1, \ | |
412 | GFC_DESCRIPTOR_SIZE (array)); \ | |
413 | } \ | |
414 | \ | |
415 | extern void cshift0_##N##_char (gfc_array_char *, GFC_INTEGER_4, \ | |
416 | const gfc_array_char *, \ | |
417 | const GFC_INTEGER_##N *, \ | |
418 | const GFC_INTEGER_##N *, GFC_INTEGER_4); \ | |
419 | export_proto(cshift0_##N##_char); \ | |
420 | \ | |
421 | void \ | |
422 | cshift0_##N##_char (gfc_array_char *ret, \ | |
423 | GFC_INTEGER_4 ret_length __attribute__((unused)), \ | |
424 | const gfc_array_char *array, \ | |
425 | const GFC_INTEGER_##N *pshift, \ | |
426 | const GFC_INTEGER_##N *pdim, \ | |
427 | GFC_INTEGER_4 array_length) \ | |
428 | { \ | |
429 | cshift0 (ret, array, *pshift, pdim ? *pdim : 1, array_length); \ | |
329f13ad | 430 | } \ |
431 | \ | |
432 | extern void cshift0_##N##_char4 (gfc_array_char *, GFC_INTEGER_4, \ | |
433 | const gfc_array_char *, \ | |
434 | const GFC_INTEGER_##N *, \ | |
435 | const GFC_INTEGER_##N *, GFC_INTEGER_4); \ | |
436 | export_proto(cshift0_##N##_char4); \ | |
437 | \ | |
438 | void \ | |
439 | cshift0_##N##_char4 (gfc_array_char *ret, \ | |
440 | GFC_INTEGER_4 ret_length __attribute__((unused)), \ | |
441 | const gfc_array_char *array, \ | |
442 | const GFC_INTEGER_##N *pshift, \ | |
443 | const GFC_INTEGER_##N *pdim, \ | |
444 | GFC_INTEGER_4 array_length) \ | |
445 | { \ | |
446 | cshift0 (ret, array, *pshift, pdim ? *pdim : 1, \ | |
447 | array_length * sizeof (gfc_char4_t)); \ | |
1a9a4a12 | 448 | } |
449 | ||
450 | DEFINE_CSHIFT (1); | |
451 | DEFINE_CSHIFT (2); | |
452 | DEFINE_CSHIFT (4); | |
453 | DEFINE_CSHIFT (8); | |
914c6756 | 454 | #ifdef HAVE_GFC_INTEGER_16 |
455 | DEFINE_CSHIFT (16); | |
456 | #endif |