]>
Commit | Line | Data |
---|---|---|
6de9cd9a | 1 | /* Implementation of the RANDOM intrinsics |
cbe34bb5 | 2 | Copyright (C) 2002-2017 Free Software Foundation, Inc. |
b152f5a2 JB |
3 | Contributed by Lars Segerlund <seger@linuxmail.org>, |
4 | Steve Kargl and Janne Blomqvist. | |
6de9cd9a | 5 | |
21d1335b | 6 | This file is part of the GNU Fortran runtime library (libgfortran). |
6de9cd9a DN |
7 | |
8 | Libgfortran is free software; you can redistribute it and/or | |
57dea9f6 | 9 | modify it under the terms of the GNU General Public |
6de9cd9a | 10 | License as published by the Free Software Foundation; either |
748086b7 | 11 | version 3 of the License, or (at your option) any later version. |
6de9cd9a DN |
12 | |
13 | Ligbfortran is distributed in the hope that it will be useful, | |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
57dea9f6 | 16 | GNU General Public License for more details. |
6de9cd9a | 17 | |
748086b7 JJ |
18 | Under Section 7 of GPL version 3, you are granted additional |
19 | permissions described in the GCC Runtime Library Exception, version | |
20 | 3.1, as published by the Free Software Foundation. | |
21 | ||
22 | You should have received a copy of the GNU General Public License and | |
23 | a copy of the GCC Runtime Library Exception along with this program; | |
24 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
25 | <http://www.gnu.org/licenses/>. */ | |
6de9cd9a | 26 | |
9ad5c32a JB |
27 | /* For rand_s. */ |
28 | #define _CRT_RAND_S | |
29 | ||
7d7b8bfe | 30 | #include "libgfortran.h" |
7606c786 | 31 | #include <gthr.h> |
34b4bc5c | 32 | #include <string.h> |
7d7b8bfe | 33 | |
b152f5a2 JB |
34 | #ifdef HAVE_UNISTD_H |
35 | #include <unistd.h> | |
36 | #endif | |
37 | #include <sys/stat.h> | |
38 | #include <fcntl.h> | |
39 | #include "time_1.h" | |
40 | ||
41 | #ifdef __MINGW32__ | |
42 | #define HAVE_GETPID 1 | |
43 | #include <process.h> | |
9ad5c32a | 44 | #include <_mingw.h> /* For __MINGW64_VERSION_MAJOR */ |
b152f5a2 JB |
45 | #endif |
46 | ||
7d7b8bfe RH |
47 | extern void random_r4 (GFC_REAL_4 *); |
48 | iexport_proto(random_r4); | |
49 | ||
50 | extern void random_r8 (GFC_REAL_8 *); | |
51 | iexport_proto(random_r8); | |
52 | ||
53 | extern void arandom_r4 (gfc_array_r4 *); | |
54 | export_proto(arandom_r4); | |
55 | ||
56 | extern void arandom_r8 (gfc_array_r8 *); | |
57 | export_proto(arandom_r8); | |
58 | ||
cdc5524f TK |
59 | #ifdef HAVE_GFC_REAL_10 |
60 | ||
61 | extern void random_r10 (GFC_REAL_10 *); | |
62 | iexport_proto(random_r10); | |
63 | ||
64 | extern void arandom_r10 (gfc_array_r10 *); | |
65 | export_proto(arandom_r10); | |
66 | ||
67 | #endif | |
68 | ||
69 | #ifdef HAVE_GFC_REAL_16 | |
70 | ||
71 | extern void random_r16 (GFC_REAL_16 *); | |
72 | iexport_proto(random_r16); | |
73 | ||
74 | extern void arandom_r16 (gfc_array_r16 *); | |
75 | export_proto(arandom_r16); | |
76 | ||
77 | #endif | |
78 | ||
5e805e44 JJ |
79 | #ifdef __GTHREAD_MUTEX_INIT |
80 | static __gthread_mutex_t random_lock = __GTHREAD_MUTEX_INIT; | |
81 | #else | |
82 | static __gthread_mutex_t random_lock; | |
83 | #endif | |
84 | ||
cdc5524f TK |
85 | /* Helper routines to map a GFC_UINTEGER_* to the corresponding |
86 | GFC_REAL_* types in the range of [0,1). If GFC_REAL_*_RADIX are 2 | |
87 | or 16, respectively, we mask off the bits that don't fit into the | |
88 | correct GFC_REAL_*, convert to the real type, then multiply by the | |
1b867ae7 | 89 | correct offset. */ |
cdc5524f TK |
90 | |
91 | ||
992b0aa1 | 92 | static void |
cdc5524f TK |
93 | rnumber_4 (GFC_REAL_4 *f, GFC_UINTEGER_4 v) |
94 | { | |
95 | GFC_UINTEGER_4 mask; | |
96 | #if GFC_REAL_4_RADIX == 2 | |
97 | mask = ~ (GFC_UINTEGER_4) 0u << (32 - GFC_REAL_4_DIGITS); | |
98 | #elif GFC_REAL_4_RADIX == 16 | |
99 | mask = ~ (GFC_UINTEGER_4) 0u << ((8 - GFC_REAL_4_DIGITS) * 4); | |
100 | #else | |
101 | #error "GFC_REAL_4_RADIX has unknown value" | |
102 | #endif | |
103 | v = v & mask; | |
a83169cd | 104 | *f = (GFC_REAL_4) v * GFC_REAL_4_LITERAL(0x1.p-32); |
cdc5524f TK |
105 | } |
106 | ||
992b0aa1 | 107 | static void |
cdc5524f TK |
108 | rnumber_8 (GFC_REAL_8 *f, GFC_UINTEGER_8 v) |
109 | { | |
110 | GFC_UINTEGER_8 mask; | |
111 | #if GFC_REAL_8_RADIX == 2 | |
112 | mask = ~ (GFC_UINTEGER_8) 0u << (64 - GFC_REAL_8_DIGITS); | |
113 | #elif GFC_REAL_8_RADIX == 16 | |
114 | mask = ~ (GFC_UINTEGER_8) 0u << (16 - GFC_REAL_8_DIGITS) * 4); | |
115 | #else | |
116 | #error "GFC_REAL_8_RADIX has unknown value" | |
117 | #endif | |
118 | v = v & mask; | |
a83169cd | 119 | *f = (GFC_REAL_8) v * GFC_REAL_8_LITERAL(0x1.p-64); |
cdc5524f TK |
120 | } |
121 | ||
122 | #ifdef HAVE_GFC_REAL_10 | |
5f251c26 | 123 | |
992b0aa1 | 124 | static void |
cdc5524f TK |
125 | rnumber_10 (GFC_REAL_10 *f, GFC_UINTEGER_8 v) |
126 | { | |
127 | GFC_UINTEGER_8 mask; | |
128 | #if GFC_REAL_10_RADIX == 2 | |
129 | mask = ~ (GFC_UINTEGER_8) 0u << (64 - GFC_REAL_10_DIGITS); | |
130 | #elif GFC_REAL_10_RADIX == 16 | |
131 | mask = ~ (GFC_UINTEGER_10) 0u << ((16 - GFC_REAL_10_DIGITS) * 4); | |
132 | #else | |
133 | #error "GFC_REAL_10_RADIX has unknown value" | |
134 | #endif | |
135 | v = v & mask; | |
a83169cd | 136 | *f = (GFC_REAL_10) v * GFC_REAL_10_LITERAL(0x1.p-64); |
cdc5524f TK |
137 | } |
138 | #endif | |
139 | ||
140 | #ifdef HAVE_GFC_REAL_16 | |
141 | ||
142 | /* For REAL(KIND=16), we only need to mask off the lower bits. */ | |
143 | ||
992b0aa1 | 144 | static void |
cdc5524f TK |
145 | rnumber_16 (GFC_REAL_16 *f, GFC_UINTEGER_8 v1, GFC_UINTEGER_8 v2) |
146 | { | |
147 | GFC_UINTEGER_8 mask; | |
148 | #if GFC_REAL_16_RADIX == 2 | |
149 | mask = ~ (GFC_UINTEGER_8) 0u << (128 - GFC_REAL_16_DIGITS); | |
150 | #elif GFC_REAL_16_RADIX == 16 | |
151 | mask = ~ (GFC_UINTEGER_8) 0u << ((32 - GFC_REAL_16_DIGITS) * 4); | |
152 | #else | |
153 | #error "GFC_REAL_16_RADIX has unknown value" | |
154 | #endif | |
155 | v2 = v2 & mask; | |
a83169cd FXC |
156 | *f = (GFC_REAL_16) v1 * GFC_REAL_16_LITERAL(0x1.p-64) |
157 | + (GFC_REAL_16) v2 * GFC_REAL_16_LITERAL(0x1.p-128); | |
cdc5524f TK |
158 | } |
159 | #endif | |
cdc5524f | 160 | |
cdc5524f | 161 | |
b152f5a2 JB |
162 | /* |
163 | ||
164 | We use the xorshift1024* generator, a fast high-quality generator | |
165 | that: | |
166 | ||
167 | - passes TestU1 without any failures | |
168 | ||
169 | - provides a "jump" function making it easy to provide many | |
170 | independent parallel streams. | |
171 | ||
172 | - Long period of 2**1024 - 1 | |
173 | ||
174 | A description can be found at | |
175 | ||
176 | http://vigna.di.unimi.it/ftp/papers/xorshift.pdf | |
177 | ||
178 | or | |
179 | ||
180 | http://arxiv.org/abs/1402.6246 | |
181 | ||
182 | The paper includes public domain source code which is the basis for | |
183 | the implementation below. | |
184 | ||
185 | */ | |
186 | typedef struct | |
187 | { | |
188 | bool init; | |
189 | int p; | |
190 | uint64_t s[16]; | |
191 | } | |
192 | xorshift1024star_state; | |
193 | ||
194 | ||
91151a73 JB |
195 | /* master_init, njumps, and master_state are the only variables |
196 | protected by random_lock. */ | |
197 | static bool master_init; | |
198 | static unsigned njumps; /* How many times we have jumped. */ | |
b152f5a2 JB |
199 | static uint64_t master_state[] = { |
200 | 0xad63fa1ed3b55f36ULL, 0xd94473e78978b497ULL, 0xbc60592a98172477ULL, | |
201 | 0xa3de7c6e81265301ULL, 0x586640c5e785af27ULL, 0x7a2a3f63b67ce5eaULL, | |
202 | 0x9fde969f922d9b82ULL, 0xe6fe34379b3f3822ULL, 0x6c277eac3e99b6c2ULL, | |
203 | 0x9197290ab0d3f069ULL, 0xdb227302f6c25576ULL, 0xee0209aee527fae9ULL, | |
204 | 0x675666a793cd05b9ULL, 0xd048c99fbc70c20fULL, 0x775f8c3dba385ef5ULL, | |
205 | 0x625288bc262faf33ULL | |
cdc5524f TK |
206 | }; |
207 | ||
cdc5524f | 208 | |
b152f5a2 | 209 | static __gthread_key_t rand_state_key; |
cdc5524f | 210 | |
b152f5a2 JB |
211 | static xorshift1024star_state* |
212 | get_rand_state (void) | |
213 | { | |
214 | /* For single threaded apps. */ | |
215 | static xorshift1024star_state rand_state; | |
216 | ||
217 | if (__gthread_active_p ()) | |
218 | { | |
219 | void* p = __gthread_getspecific (rand_state_key); | |
220 | if (!p) | |
221 | { | |
222 | p = xcalloc (1, sizeof (xorshift1024star_state)); | |
223 | __gthread_setspecific (rand_state_key, p); | |
224 | } | |
225 | return p; | |
226 | } | |
227 | else | |
228 | return &rand_state; | |
229 | } | |
5f251c26 | 230 | |
5f251c26 | 231 | |
b152f5a2 JB |
232 | static uint64_t |
233 | xorshift1024star (xorshift1024star_state* rs) | |
5f251c26 | 234 | { |
b152f5a2 JB |
235 | int p = rs->p; |
236 | const uint64_t s0 = rs->s[p]; | |
237 | uint64_t s1 = rs->s[p = (p + 1) & 15]; | |
238 | s1 ^= s1 << 31; | |
239 | rs->s[p] = s1 ^ s0 ^ (s1 >> 11) ^ (s0 >> 30); | |
240 | rs->p = p; | |
241 | return rs->s[p] * UINT64_C(1181783497276652981); | |
242 | } | |
5f251c26 | 243 | |
5f251c26 | 244 | |
b152f5a2 JB |
245 | /* This is the jump function for the generator. It is equivalent to |
246 | 2^512 calls to xorshift1024star(); it can be used to generate 2^512 | |
247 | non-overlapping subsequences for parallel computations. */ | |
248 | ||
249 | static void | |
250 | jump (xorshift1024star_state* rs) | |
251 | { | |
252 | static const uint64_t JUMP[] = { | |
253 | 0x84242f96eca9c41dULL, 0xa3c65b8776f96855ULL, 0x5b34a39f070b5837ULL, | |
254 | 0x4489affce4f31a1eULL, 0x2ffeeb0a48316f40ULL, 0xdc2d9891fe68c022ULL, | |
255 | 0x3659132bb12fea70ULL, 0xaac17d8efa43cab8ULL, 0xc4cb815590989b13ULL, | |
256 | 0x5ee975283d71c93bULL, 0x691548c86c1bd540ULL, 0x7910c41d10a1e6a5ULL, | |
257 | 0x0b5fc64563b3e2a8ULL, 0x047f7684e9fc949dULL, 0xb99181f2d8f685caULL, | |
258 | 0x284600e3f30e38c3ULL | |
259 | }; | |
260 | ||
261 | uint64_t t[16] = { 0 }; | |
262 | for(unsigned int i = 0; i < sizeof JUMP / sizeof *JUMP; i++) | |
263 | for(int b = 0; b < 64; b++) | |
264 | { | |
265 | if (JUMP[i] & 1ULL << b) | |
266 | for(int j = 0; j < 16; j++) | |
267 | t[j] ^= rs->s[(j + rs->p) & 15]; | |
268 | xorshift1024star (rs); | |
269 | } | |
270 | for(int j = 0; j < 16; j++) | |
271 | rs->s[(j + rs->p) & 15] = t[j]; | |
5f251c26 SK |
272 | } |
273 | ||
b152f5a2 | 274 | |
b152f5a2 JB |
275 | /* Super-simple LCG generator used in getosrandom () if /dev/urandom |
276 | doesn't exist. */ | |
277 | ||
278 | #define M 2147483647 /* 2^31 - 1 (A large prime number) */ | |
279 | #define A 16807 /* Prime root of M, passes statistical tests and produces a full cycle */ | |
280 | #define Q 127773 /* M / A (To avoid overflow on A * seed) */ | |
281 | #define R 2836 /* M % A (To avoid overflow on A * seed) */ | |
282 | ||
9ad5c32a | 283 | __attribute__((unused)) static uint32_t |
b152f5a2 JB |
284 | lcg_parkmiller(uint32_t seed) |
285 | { | |
286 | uint32_t hi = seed / Q; | |
287 | uint32_t lo = seed % Q; | |
288 | int32_t test = A * lo - R * hi; | |
289 | if (test <= 0) | |
290 | test += M; | |
291 | return test; | |
292 | } | |
293 | ||
294 | #undef M | |
295 | #undef A | |
296 | #undef Q | |
297 | #undef R | |
298 | ||
9ad5c32a | 299 | |
b152f5a2 JB |
300 | /* Get some random bytes from the operating system in order to seed |
301 | the PRNG. */ | |
302 | ||
303 | static int | |
304 | getosrandom (void *buf, size_t buflen) | |
305 | { | |
9ad5c32a | 306 | /* rand_s is available in MinGW-w64 but not plain MinGW. */ |
9449b700 | 307 | #if defined(__MINGW64_VERSION_MAJOR) && !defined(__CYGWIN__) |
9ad5c32a JB |
308 | unsigned int* b = buf; |
309 | for (unsigned i = 0; i < buflen / sizeof (unsigned int); i++) | |
310 | rand_s (&b[i]); | |
311 | return buflen; | |
312 | #else | |
313 | /* | |
b152f5a2 JB |
314 | TODO: When glibc adds a wrapper for the getrandom() system call |
315 | on Linux, one could use that. | |
316 | ||
317 | TODO: One could use getentropy() on OpenBSD. */ | |
318 | int flags = O_RDONLY; | |
319 | #ifdef O_CLOEXEC | |
320 | flags |= O_CLOEXEC; | |
321 | #endif | |
322 | int fd = open("/dev/urandom", flags); | |
323 | if (fd != -1) | |
324 | { | |
325 | int res = read(fd, buf, buflen); | |
326 | close (fd); | |
327 | return res; | |
328 | } | |
329 | uint32_t seed = 1234567890; | |
330 | time_t secs; | |
331 | long usecs; | |
332 | if (gf_gettime (&secs, &usecs) == 0) | |
333 | { | |
334 | seed ^= secs; | |
335 | seed ^= usecs; | |
336 | } | |
337 | #ifdef HAVE_GETPID | |
338 | pid_t pid = getpid(); | |
339 | seed ^= pid; | |
340 | #endif | |
341 | uint32_t* ub = buf; | |
9ad5c32a | 342 | for (size_t i = 0; i < buflen / sizeof (uint32_t); i++) |
b152f5a2 JB |
343 | { |
344 | ub[i] = seed; | |
345 | seed = lcg_parkmiller (seed); | |
346 | } | |
347 | return buflen; | |
9ad5c32a | 348 | #endif /* __MINGW64_VERSION_MAJOR */ |
b152f5a2 JB |
349 | } |
350 | ||
351 | ||
91151a73 JB |
352 | /* Initialize the random number generator for the current thread, |
353 | using the master state and the number of times we must jump. */ | |
354 | ||
355 | static void | |
356 | init_rand_state (xorshift1024star_state* rs, const bool locked) | |
357 | { | |
358 | if (!locked) | |
359 | __gthread_mutex_lock (&random_lock); | |
360 | if (!master_init) | |
361 | { | |
362 | getosrandom (master_state, sizeof (master_state)); | |
363 | njumps = 0; | |
364 | master_init = true; | |
365 | } | |
366 | memcpy (&rs->s, master_state, sizeof (master_state)); | |
367 | unsigned n = njumps++; | |
368 | if (!locked) | |
369 | __gthread_mutex_unlock (&random_lock); | |
370 | for (unsigned i = 0; i < n; i++) | |
371 | jump (rs); | |
372 | rs->init = true; | |
373 | } | |
374 | ||
375 | ||
cdaa9fc4 | 376 | /* This function produces a REAL(4) value from the uniform distribution |
5f251c26 SK |
377 | with range [0,1). */ |
378 | ||
379 | void | |
7d7b8bfe | 380 | random_r4 (GFC_REAL_4 *x) |
5f251c26 | 381 | { |
b152f5a2 JB |
382 | xorshift1024star_state* rs = get_rand_state(); |
383 | ||
384 | if (unlikely (!rs->init)) | |
385 | init_rand_state (rs, false); | |
386 | uint64_t r = xorshift1024star (rs); | |
387 | /* Take the higher bits, ensuring that a stream of real(4), real(8), | |
388 | and real(10) will be identical (except for precision). */ | |
389 | uint32_t high = (uint32_t) (r >> 32); | |
390 | rnumber_4 (x, high); | |
5f251c26 | 391 | } |
7d7b8bfe | 392 | iexport(random_r4); |
5f251c26 SK |
393 | |
394 | /* This function produces a REAL(8) value from the uniform distribution | |
395 | with range [0,1). */ | |
396 | ||
397 | void | |
7d7b8bfe | 398 | random_r8 (GFC_REAL_8 *x) |
5f251c26 | 399 | { |
b152f5a2 JB |
400 | GFC_UINTEGER_8 r; |
401 | xorshift1024star_state* rs = get_rand_state(); | |
5f251c26 | 402 | |
b152f5a2 JB |
403 | if (unlikely (!rs->init)) |
404 | init_rand_state (rs, false); | |
405 | r = xorshift1024star (rs); | |
406 | rnumber_8 (x, r); | |
5f251c26 | 407 | } |
7d7b8bfe | 408 | iexport(random_r8); |
5f251c26 | 409 | |
cdc5524f TK |
410 | #ifdef HAVE_GFC_REAL_10 |
411 | ||
412 | /* This function produces a REAL(10) value from the uniform distribution | |
413 | with range [0,1). */ | |
414 | ||
415 | void | |
416 | random_r10 (GFC_REAL_10 *x) | |
417 | { | |
b152f5a2 JB |
418 | GFC_UINTEGER_8 r; |
419 | xorshift1024star_state* rs = get_rand_state(); | |
cdc5524f | 420 | |
b152f5a2 JB |
421 | if (unlikely (!rs->init)) |
422 | init_rand_state (rs, false); | |
423 | r = xorshift1024star (rs); | |
424 | rnumber_10 (x, r); | |
cdc5524f TK |
425 | } |
426 | iexport(random_r10); | |
427 | ||
428 | #endif | |
429 | ||
430 | /* This function produces a REAL(16) value from the uniform distribution | |
431 | with range [0,1). */ | |
432 | ||
433 | #ifdef HAVE_GFC_REAL_16 | |
434 | ||
435 | void | |
436 | random_r16 (GFC_REAL_16 *x) | |
437 | { | |
b152f5a2 JB |
438 | GFC_UINTEGER_8 r1, r2; |
439 | xorshift1024star_state* rs = get_rand_state(); | |
440 | ||
441 | if (unlikely (!rs->init)) | |
442 | init_rand_state (rs, false); | |
443 | r1 = xorshift1024star (rs); | |
444 | r2 = xorshift1024star (rs); | |
445 | rnumber_16 (x, r1, r2); | |
cdc5524f TK |
446 | } |
447 | iexport(random_r16); | |
448 | ||
449 | ||
450 | #endif | |
b152f5a2 | 451 | |
5f251c26 SK |
452 | /* This function fills a REAL(4) array with values from the uniform |
453 | distribution with range [0,1). */ | |
454 | ||
455 | void | |
7d7b8bfe | 456 | arandom_r4 (gfc_array_r4 *x) |
5f251c26 | 457 | { |
e33e218b TK |
458 | index_type count[GFC_MAX_DIMENSIONS]; |
459 | index_type extent[GFC_MAX_DIMENSIONS]; | |
460 | index_type stride[GFC_MAX_DIMENSIONS]; | |
5f251c26 SK |
461 | index_type stride0; |
462 | index_type dim; | |
463 | GFC_REAL_4 *dest; | |
b152f5a2 | 464 | xorshift1024star_state* rs = get_rand_state(); |
5f251c26 SK |
465 | int n; |
466 | ||
b152f5a2 | 467 | |
21d1335b | 468 | dest = x->base_addr; |
5f251c26 | 469 | |
5f251c26 SK |
470 | dim = GFC_DESCRIPTOR_RANK (x); |
471 | ||
472 | for (n = 0; n < dim; n++) | |
473 | { | |
474 | count[n] = 0; | |
dfb55fdc TK |
475 | stride[n] = GFC_DESCRIPTOR_STRIDE(x,n); |
476 | extent[n] = GFC_DESCRIPTOR_EXTENT(x,n); | |
5f251c26 SK |
477 | if (extent[n] <= 0) |
478 | return; | |
479 | } | |
480 | ||
481 | stride0 = stride[0]; | |
482 | ||
b152f5a2 JB |
483 | if (unlikely (!rs->init)) |
484 | init_rand_state (rs, false); | |
5e805e44 | 485 | |
5f251c26 SK |
486 | while (dest) |
487 | { | |
1b867ae7 | 488 | /* random_r4 (dest); */ |
b152f5a2 JB |
489 | uint64_t r = xorshift1024star (rs); |
490 | uint32_t high = (uint32_t) (r >> 32); | |
491 | rnumber_4 (dest, high); | |
5f251c26 SK |
492 | |
493 | /* Advance to the next element. */ | |
494 | dest += stride0; | |
495 | count[0]++; | |
496 | /* Advance to the next source element. */ | |
497 | n = 0; | |
498 | while (count[n] == extent[n]) | |
499 | { | |
500 | /* When we get to the end of a dimension, reset it and increment | |
501 | the next dimension. */ | |
502 | count[n] = 0; | |
503 | /* We could precalculate these products, but this is a less | |
504 | frequently used path so probably not worth it. */ | |
505 | dest -= stride[n] * extent[n]; | |
506 | n++; | |
507 | if (n == dim) | |
508 | { | |
509 | dest = NULL; | |
510 | break; | |
511 | } | |
512 | else | |
513 | { | |
514 | count[n]++; | |
515 | dest += stride[n]; | |
516 | } | |
517 | } | |
518 | } | |
519 | } | |
520 | ||
cdaa9fc4 | 521 | /* This function fills a REAL(8) array with values from the uniform |
5f251c26 SK |
522 | distribution with range [0,1). */ |
523 | ||
524 | void | |
7d7b8bfe | 525 | arandom_r8 (gfc_array_r8 *x) |
5f251c26 | 526 | { |
e33e218b TK |
527 | index_type count[GFC_MAX_DIMENSIONS]; |
528 | index_type extent[GFC_MAX_DIMENSIONS]; | |
529 | index_type stride[GFC_MAX_DIMENSIONS]; | |
5f251c26 SK |
530 | index_type stride0; |
531 | index_type dim; | |
532 | GFC_REAL_8 *dest; | |
b152f5a2 | 533 | xorshift1024star_state* rs = get_rand_state(); |
5f251c26 SK |
534 | int n; |
535 | ||
21d1335b | 536 | dest = x->base_addr; |
5f251c26 | 537 | |
5f251c26 SK |
538 | dim = GFC_DESCRIPTOR_RANK (x); |
539 | ||
540 | for (n = 0; n < dim; n++) | |
541 | { | |
542 | count[n] = 0; | |
dfb55fdc TK |
543 | stride[n] = GFC_DESCRIPTOR_STRIDE(x,n); |
544 | extent[n] = GFC_DESCRIPTOR_EXTENT(x,n); | |
5f251c26 SK |
545 | if (extent[n] <= 0) |
546 | return; | |
547 | } | |
548 | ||
549 | stride0 = stride[0]; | |
550 | ||
b152f5a2 JB |
551 | if (unlikely (!rs->init)) |
552 | init_rand_state (rs, false); | |
5e805e44 | 553 | |
5f251c26 SK |
554 | while (dest) |
555 | { | |
1b867ae7 | 556 | /* random_r8 (dest); */ |
b152f5a2 JB |
557 | uint64_t r = xorshift1024star (rs); |
558 | rnumber_8 (dest, r); | |
cdc5524f TK |
559 | |
560 | /* Advance to the next element. */ | |
561 | dest += stride0; | |
562 | count[0]++; | |
563 | /* Advance to the next source element. */ | |
564 | n = 0; | |
565 | while (count[n] == extent[n]) | |
566 | { | |
567 | /* When we get to the end of a dimension, reset it and increment | |
568 | the next dimension. */ | |
569 | count[n] = 0; | |
570 | /* We could precalculate these products, but this is a less | |
571 | frequently used path so probably not worth it. */ | |
572 | dest -= stride[n] * extent[n]; | |
573 | n++; | |
574 | if (n == dim) | |
575 | { | |
576 | dest = NULL; | |
577 | break; | |
578 | } | |
579 | else | |
580 | { | |
581 | count[n]++; | |
582 | dest += stride[n]; | |
583 | } | |
584 | } | |
585 | } | |
cdc5524f TK |
586 | } |
587 | ||
588 | #ifdef HAVE_GFC_REAL_10 | |
589 | ||
590 | /* This function fills a REAL(10) array with values from the uniform | |
591 | distribution with range [0,1). */ | |
592 | ||
593 | void | |
594 | arandom_r10 (gfc_array_r10 *x) | |
595 | { | |
596 | index_type count[GFC_MAX_DIMENSIONS]; | |
597 | index_type extent[GFC_MAX_DIMENSIONS]; | |
598 | index_type stride[GFC_MAX_DIMENSIONS]; | |
599 | index_type stride0; | |
600 | index_type dim; | |
601 | GFC_REAL_10 *dest; | |
b152f5a2 | 602 | xorshift1024star_state* rs = get_rand_state(); |
cdc5524f TK |
603 | int n; |
604 | ||
21d1335b | 605 | dest = x->base_addr; |
cdc5524f TK |
606 | |
607 | dim = GFC_DESCRIPTOR_RANK (x); | |
608 | ||
609 | for (n = 0; n < dim; n++) | |
610 | { | |
611 | count[n] = 0; | |
dfb55fdc TK |
612 | stride[n] = GFC_DESCRIPTOR_STRIDE(x,n); |
613 | extent[n] = GFC_DESCRIPTOR_EXTENT(x,n); | |
cdc5524f TK |
614 | if (extent[n] <= 0) |
615 | return; | |
616 | } | |
617 | ||
618 | stride0 = stride[0]; | |
619 | ||
b152f5a2 JB |
620 | if (unlikely (!rs->init)) |
621 | init_rand_state (rs, false); | |
cdc5524f TK |
622 | |
623 | while (dest) | |
624 | { | |
1b867ae7 | 625 | /* random_r10 (dest); */ |
b152f5a2 JB |
626 | uint64_t r = xorshift1024star (rs); |
627 | rnumber_10 (dest, r); | |
cdc5524f TK |
628 | |
629 | /* Advance to the next element. */ | |
630 | dest += stride0; | |
631 | count[0]++; | |
632 | /* Advance to the next source element. */ | |
633 | n = 0; | |
634 | while (count[n] == extent[n]) | |
635 | { | |
636 | /* When we get to the end of a dimension, reset it and increment | |
637 | the next dimension. */ | |
638 | count[n] = 0; | |
639 | /* We could precalculate these products, but this is a less | |
640 | frequently used path so probably not worth it. */ | |
641 | dest -= stride[n] * extent[n]; | |
642 | n++; | |
643 | if (n == dim) | |
644 | { | |
645 | dest = NULL; | |
646 | break; | |
647 | } | |
648 | else | |
649 | { | |
650 | count[n]++; | |
651 | dest += stride[n]; | |
652 | } | |
653 | } | |
654 | } | |
cdc5524f TK |
655 | } |
656 | ||
657 | #endif | |
658 | ||
659 | #ifdef HAVE_GFC_REAL_16 | |
660 | ||
661 | /* This function fills a REAL(16) array with values from the uniform | |
662 | distribution with range [0,1). */ | |
663 | ||
664 | void | |
665 | arandom_r16 (gfc_array_r16 *x) | |
666 | { | |
667 | index_type count[GFC_MAX_DIMENSIONS]; | |
668 | index_type extent[GFC_MAX_DIMENSIONS]; | |
669 | index_type stride[GFC_MAX_DIMENSIONS]; | |
670 | index_type stride0; | |
671 | index_type dim; | |
672 | GFC_REAL_16 *dest; | |
b152f5a2 | 673 | xorshift1024star_state* rs = get_rand_state(); |
cdc5524f TK |
674 | int n; |
675 | ||
21d1335b | 676 | dest = x->base_addr; |
cdc5524f TK |
677 | |
678 | dim = GFC_DESCRIPTOR_RANK (x); | |
679 | ||
680 | for (n = 0; n < dim; n++) | |
681 | { | |
682 | count[n] = 0; | |
dfb55fdc TK |
683 | stride[n] = GFC_DESCRIPTOR_STRIDE(x,n); |
684 | extent[n] = GFC_DESCRIPTOR_EXTENT(x,n); | |
cdc5524f TK |
685 | if (extent[n] <= 0) |
686 | return; | |
687 | } | |
688 | ||
689 | stride0 = stride[0]; | |
690 | ||
b152f5a2 JB |
691 | if (unlikely (!rs->init)) |
692 | init_rand_state (rs, false); | |
cdc5524f TK |
693 | |
694 | while (dest) | |
695 | { | |
1b867ae7 | 696 | /* random_r16 (dest); */ |
b152f5a2 JB |
697 | uint64_t r1 = xorshift1024star (rs); |
698 | uint64_t r2 = xorshift1024star (rs); | |
699 | rnumber_16 (dest, r1, r2); | |
5f251c26 SK |
700 | |
701 | /* Advance to the next element. */ | |
702 | dest += stride0; | |
703 | count[0]++; | |
704 | /* Advance to the next source element. */ | |
705 | n = 0; | |
706 | while (count[n] == extent[n]) | |
707 | { | |
708 | /* When we get to the end of a dimension, reset it and increment | |
709 | the next dimension. */ | |
710 | count[n] = 0; | |
711 | /* We could precalculate these products, but this is a less | |
712 | frequently used path so probably not worth it. */ | |
713 | dest -= stride[n] * extent[n]; | |
714 | n++; | |
715 | if (n == dim) | |
716 | { | |
717 | dest = NULL; | |
718 | break; | |
719 | } | |
720 | else | |
721 | { | |
722 | count[n]++; | |
723 | dest += stride[n]; | |
724 | } | |
725 | } | |
726 | } | |
727 | } | |
728 | ||
cdc5524f TK |
729 | #endif |
730 | ||
2d3ca8b7 | 731 | |
09309e09 JB |
732 | /* Number of elements in master_state array. */ |
733 | #define SZU64 (sizeof (master_state) / sizeof (uint64_t)) | |
2d3ca8b7 | 734 | |
2d3ca8b7 | 735 | |
09309e09 | 736 | /* Keys for scrambling the seed in order to avoid poor seeds. */ |
2d3ca8b7 | 737 | |
09309e09 JB |
738 | static const uint64_t xor_keys[] = { |
739 | 0xbd0c5b6e50c2df49ULL, 0xd46061cd46e1df38ULL, 0xbb4f4d4ed6103544ULL, | |
740 | 0x114a583d0756ad39ULL, 0x4b5ad8623d0aaab6ULL, 0x3f2ed7afbe0c0f21ULL, | |
741 | 0xdec83fd65f113445ULL, 0x3824f8fbc4f10d24ULL, 0x5d9025af05878911ULL, | |
742 | 0x500bc46b540340e9ULL, 0x8bd53298e0d00530ULL, 0x57886e40a952e06aULL, | |
743 | 0x926e76c88e31cdb6ULL, 0xbd0724dac0a3a5f9ULL, 0xc5c8981b858ab796ULL, | |
744 | 0xbb12ab2694c2b32cULL | |
745 | }; | |
746 | ||
747 | ||
748 | /* Since a XOR cipher is symmetric, we need only one routine, and we | |
749 | can use it both for encryption and decryption. */ | |
2d3ca8b7 FXC |
750 | |
751 | static void | |
09309e09 | 752 | scramble_seed (uint64_t *dest, const uint64_t *src) |
2d3ca8b7 | 753 | { |
09309e09 JB |
754 | for (int i = 0; i < (int) SZU64; i++) |
755 | dest[i] = src[i] ^ xor_keys[i]; | |
2d3ca8b7 FXC |
756 | } |
757 | ||
758 | ||
7d7b8bfe | 759 | /* random_seed is used to seed the PRNG with either a default |
420aa7b8 | 760 | set of seeds or user specified set of seeds. random_seed |
5f251c26 SK |
761 | must be called with no argument or exactly one argument. */ |
762 | ||
763 | void | |
34b4bc5c | 764 | random_seed_i4 (GFC_INTEGER_4 *size, gfc_array_i4 *put, gfc_array_i4 *get) |
5f251c26 | 765 | { |
09309e09 | 766 | uint64_t seed[SZU64]; |
b152f5a2 | 767 | #define SZ (sizeof (master_state) / sizeof (GFC_INTEGER_4)) |
5e805e44 | 768 | |
34b4bc5c FXC |
769 | /* Check that we only have one argument present. */ |
770 | if ((size ? 1 : 0) + (put ? 1 : 0) + (get ? 1 : 0) > 1) | |
771 | runtime_error ("RANDOM_SEED should have at most one argument present."); | |
cdc5524f | 772 | |
b152f5a2 JB |
773 | if (size != NULL) |
774 | *size = SZ + 1; | |
775 | ||
776 | xorshift1024star_state* rs = get_rand_state(); | |
777 | ||
778 | /* Return the seed to GET data. */ | |
779 | if (get != NULL) | |
780 | { | |
781 | /* If the rank of the array is not 1, abort. */ | |
782 | if (GFC_DESCRIPTOR_RANK (get) != 1) | |
783 | runtime_error ("Array rank of GET is not 1."); | |
784 | ||
785 | /* If the array is too small, abort. */ | |
786 | if (GFC_DESCRIPTOR_EXTENT(get,0) < (index_type) SZ + 1) | |
787 | runtime_error ("Array size of GET is too small."); | |
788 | ||
789 | if (!rs->init) | |
790 | init_rand_state (rs, false); | |
791 | ||
792 | /* Unscramble the seed. */ | |
09309e09 | 793 | scramble_seed (seed, rs->s); |
b152f5a2 JB |
794 | |
795 | /* Then copy it back to the user variable. */ | |
796 | for (size_t i = 0; i < SZ ; i++) | |
797 | memcpy (&(get->base_addr[(SZ - 1 - i) * GFC_DESCRIPTOR_STRIDE(get,0)]), | |
09309e09 | 798 | (unsigned char*) seed + i * sizeof(GFC_UINTEGER_4), |
b152f5a2 JB |
799 | sizeof(GFC_UINTEGER_4)); |
800 | ||
801 | /* Finally copy the value of p after the seed. */ | |
802 | get->base_addr[SZ * GFC_DESCRIPTOR_STRIDE(get, 0)] = rs->p; | |
803 | } | |
804 | ||
805 | else | |
806 | { | |
807 | __gthread_mutex_lock (&random_lock); | |
808 | ||
34b4bc5c FXC |
809 | /* From the standard: "If no argument is present, the processor assigns |
810 | a processor-dependent value to the seed." */ | |
811 | if (size == NULL && put == NULL && get == NULL) | |
b152f5a2 | 812 | { |
91151a73 | 813 | master_init = false; |
b152f5a2 JB |
814 | init_rand_state (rs, true); |
815 | } | |
5f251c26 SK |
816 | |
817 | if (put != NULL) | |
818 | { | |
cdaa9fc4 | 819 | /* If the rank of the array is not 1, abort. */ |
5f251c26 SK |
820 | if (GFC_DESCRIPTOR_RANK (put) != 1) |
821 | runtime_error ("Array rank of PUT is not 1."); | |
822 | ||
cdaa9fc4 | 823 | /* If the array is too small, abort. */ |
b152f5a2 | 824 | if (GFC_DESCRIPTOR_EXTENT(put,0) < (index_type) SZ + 1) |
5f251c26 SK |
825 | runtime_error ("Array size of PUT is too small."); |
826 | ||
2d3ca8b7 | 827 | /* We copy the seed given by the user. */ |
b152f5a2 | 828 | for (size_t i = 0; i < SZ; i++) |
09309e09 | 829 | memcpy ((unsigned char*) seed + i * sizeof(GFC_UINTEGER_4), |
b152f5a2 | 830 | &(put->base_addr[(SZ - 1 - i) * GFC_DESCRIPTOR_STRIDE(put,0)]), |
2d3ca8b7 FXC |
831 | sizeof(GFC_UINTEGER_4)); |
832 | ||
833 | /* We put it after scrambling the bytes, to paper around users who | |
834 | provide seeds with quality only in the lower or upper part. */ | |
09309e09 | 835 | scramble_seed (master_state, seed); |
b152f5a2 | 836 | njumps = 0; |
91151a73 | 837 | master_init = true; |
b152f5a2 | 838 | init_rand_state (rs, true); |
5f251c26 | 839 | |
b152f5a2 JB |
840 | rs->p = put->base_addr[SZ * GFC_DESCRIPTOR_STRIDE(put, 0)] & 15; |
841 | } | |
5f251c26 | 842 | |
5e805e44 | 843 | __gthread_mutex_unlock (&random_lock); |
b152f5a2 JB |
844 | } |
845 | #undef SZ | |
5f251c26 | 846 | } |
34b4bc5c FXC |
847 | iexport(random_seed_i4); |
848 | ||
849 | ||
850 | void | |
851 | random_seed_i8 (GFC_INTEGER_8 *size, gfc_array_i8 *put, gfc_array_i8 *get) | |
852 | { | |
09309e09 JB |
853 | uint64_t seed[SZU64]; |
854 | ||
34b4bc5c FXC |
855 | /* Check that we only have one argument present. */ |
856 | if ((size ? 1 : 0) + (put ? 1 : 0) + (get ? 1 : 0) > 1) | |
857 | runtime_error ("RANDOM_SEED should have at most one argument present."); | |
858 | ||
b152f5a2 | 859 | #define SZ (sizeof (master_state) / sizeof (GFC_INTEGER_8)) |
34b4bc5c | 860 | if (size != NULL) |
b152f5a2 | 861 | *size = SZ + 1; |
34b4bc5c | 862 | |
b152f5a2 JB |
863 | xorshift1024star_state* rs = get_rand_state(); |
864 | ||
865 | /* Return the seed to GET data. */ | |
866 | if (get != NULL) | |
34b4bc5c FXC |
867 | { |
868 | /* If the rank of the array is not 1, abort. */ | |
b152f5a2 JB |
869 | if (GFC_DESCRIPTOR_RANK (get) != 1) |
870 | runtime_error ("Array rank of GET is not 1."); | |
34b4bc5c FXC |
871 | |
872 | /* If the array is too small, abort. */ | |
b152f5a2 JB |
873 | if (GFC_DESCRIPTOR_EXTENT(get,0) < (index_type) SZ + 1) |
874 | runtime_error ("Array size of GET is too small."); | |
875 | ||
876 | if (!rs->init) | |
877 | init_rand_state (rs, false); | |
34b4bc5c | 878 | |
09309e09 JB |
879 | /* Unscramble the seed. */ |
880 | scramble_seed (seed, rs->s); | |
881 | ||
34b4bc5c | 882 | /* This code now should do correct strides. */ |
b152f5a2 | 883 | for (size_t i = 0; i < SZ; i++) |
09309e09 | 884 | memcpy (&(get->base_addr[i * GFC_DESCRIPTOR_STRIDE(get,0)]), &seed[i], |
34b4bc5c | 885 | sizeof (GFC_UINTEGER_8)); |
b152f5a2 JB |
886 | |
887 | get->base_addr[SZ * GFC_DESCRIPTOR_STRIDE(get, 0)] = rs->p; | |
34b4bc5c FXC |
888 | } |
889 | ||
b152f5a2 JB |
890 | else |
891 | { | |
892 | __gthread_mutex_lock (&random_lock); | |
893 | ||
894 | /* From the standard: "If no argument is present, the processor assigns | |
895 | a processor-dependent value to the seed." */ | |
896 | if (size == NULL && put == NULL && get == NULL) | |
897 | { | |
91151a73 | 898 | master_init = false; |
b152f5a2 JB |
899 | init_rand_state (rs, true); |
900 | } | |
901 | ||
902 | if (put != NULL) | |
34b4bc5c FXC |
903 | { |
904 | /* If the rank of the array is not 1, abort. */ | |
b152f5a2 JB |
905 | if (GFC_DESCRIPTOR_RANK (put) != 1) |
906 | runtime_error ("Array rank of PUT is not 1."); | |
34b4bc5c FXC |
907 | |
908 | /* If the array is too small, abort. */ | |
b152f5a2 JB |
909 | if (GFC_DESCRIPTOR_EXTENT(put,0) < (index_type) SZ + 1) |
910 | runtime_error ("Array size of PUT is too small."); | |
34b4bc5c FXC |
911 | |
912 | /* This code now should do correct strides. */ | |
b152f5a2 | 913 | for (size_t i = 0; i < SZ; i++) |
09309e09 | 914 | memcpy (&seed[i], &(put->base_addr[i * GFC_DESCRIPTOR_STRIDE(put,0)]), |
34b4bc5c | 915 | sizeof (GFC_UINTEGER_8)); |
b152f5a2 | 916 | |
09309e09 | 917 | scramble_seed (master_state, seed); |
b152f5a2 | 918 | njumps = 0; |
91151a73 | 919 | master_init = true; |
b152f5a2 JB |
920 | init_rand_state (rs, true); |
921 | rs->p = put->base_addr[SZ * GFC_DESCRIPTOR_STRIDE(put, 0)] & 15; | |
922 | } | |
923 | ||
34b4bc5c FXC |
924 | |
925 | __gthread_mutex_unlock (&random_lock); | |
b152f5a2 | 926 | } |
34b4bc5c FXC |
927 | } |
928 | iexport(random_seed_i8); | |
7d7b8bfe | 929 | |
5e805e44 | 930 | |
b152f5a2 | 931 | #if !defined __GTHREAD_MUTEX_INIT || defined __GTHREADS |
5e805e44 | 932 | static void __attribute__((constructor)) |
b152f5a2 | 933 | constructor_random (void) |
5e805e44 | 934 | { |
b152f5a2 | 935 | #ifndef __GTHREAD_MUTEX_INIT |
5e805e44 | 936 | __GTHREAD_MUTEX_INIT_FUNCTION (&random_lock); |
b152f5a2 JB |
937 | #endif |
938 | if (__gthread_active_p ()) | |
939 | __gthread_key_create (&rand_state_key, &free); | |
940 | } | |
941 | #endif | |
942 | ||
943 | #ifdef __GTHREADS | |
944 | static void __attribute__((destructor)) | |
945 | destructor_random (void) | |
946 | { | |
947 | if (__gthread_active_p ()) | |
948 | __gthread_key_delete (rand_state_key); | |
5e805e44 JJ |
949 | } |
950 | #endif |