]>
Commit | Line | Data |
---|---|---|
6de9cd9a | 1 | /* Implementation of the RANDOM intrinsics |
8d9254fc | 2 | Copyright (C) 2002-2020 Free Software Foundation, Inc. |
b152f5a2 JB |
3 | Contributed by Lars Segerlund <seger@linuxmail.org>, |
4 | Steve Kargl and Janne Blomqvist. | |
6de9cd9a | 5 | |
21d1335b | 6 | This file is part of the GNU Fortran runtime library (libgfortran). |
6de9cd9a DN |
7 | |
8 | Libgfortran is free software; you can redistribute it and/or | |
57dea9f6 | 9 | modify it under the terms of the GNU General Public |
6de9cd9a | 10 | License as published by the Free Software Foundation; either |
748086b7 | 11 | version 3 of the License, or (at your option) any later version. |
6de9cd9a DN |
12 | |
13 | Ligbfortran is distributed in the hope that it will be useful, | |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
57dea9f6 | 16 | GNU General Public License for more details. |
6de9cd9a | 17 | |
748086b7 JJ |
18 | Under Section 7 of GPL version 3, you are granted additional |
19 | permissions described in the GCC Runtime Library Exception, version | |
20 | 3.1, as published by the Free Software Foundation. | |
21 | ||
22 | You should have received a copy of the GNU General Public License and | |
23 | a copy of the GCC Runtime Library Exception along with this program; | |
24 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
25 | <http://www.gnu.org/licenses/>. */ | |
6de9cd9a | 26 | |
9ad5c32a JB |
27 | /* For rand_s. */ |
28 | #define _CRT_RAND_S | |
29 | ||
7d7b8bfe | 30 | #include "libgfortran.h" |
7606c786 | 31 | #include <gthr.h> |
34b4bc5c | 32 | #include <string.h> |
7d7b8bfe | 33 | |
b152f5a2 JB |
34 | #ifdef HAVE_UNISTD_H |
35 | #include <unistd.h> | |
36 | #endif | |
37 | #include <sys/stat.h> | |
38 | #include <fcntl.h> | |
39 | #include "time_1.h" | |
864c28e1 RO |
40 | #ifdef HAVE_SYS_RANDOM_H |
41 | #include <sys/random.h> | |
42 | #endif | |
b152f5a2 JB |
43 | |
44 | #ifdef __MINGW32__ | |
45 | #define HAVE_GETPID 1 | |
46 | #include <process.h> | |
9ad5c32a | 47 | #include <_mingw.h> /* For __MINGW64_VERSION_MAJOR */ |
b152f5a2 JB |
48 | #endif |
49 | ||
7d7b8bfe RH |
50 | extern void random_r4 (GFC_REAL_4 *); |
51 | iexport_proto(random_r4); | |
52 | ||
53 | extern void random_r8 (GFC_REAL_8 *); | |
54 | iexport_proto(random_r8); | |
55 | ||
56 | extern void arandom_r4 (gfc_array_r4 *); | |
57 | export_proto(arandom_r4); | |
58 | ||
59 | extern void arandom_r8 (gfc_array_r8 *); | |
60 | export_proto(arandom_r8); | |
61 | ||
cdc5524f TK |
62 | #ifdef HAVE_GFC_REAL_10 |
63 | ||
64 | extern void random_r10 (GFC_REAL_10 *); | |
65 | iexport_proto(random_r10); | |
66 | ||
67 | extern void arandom_r10 (gfc_array_r10 *); | |
68 | export_proto(arandom_r10); | |
69 | ||
70 | #endif | |
71 | ||
72 | #ifdef HAVE_GFC_REAL_16 | |
73 | ||
74 | extern void random_r16 (GFC_REAL_16 *); | |
75 | iexport_proto(random_r16); | |
76 | ||
77 | extern void arandom_r16 (gfc_array_r16 *); | |
78 | export_proto(arandom_r16); | |
79 | ||
80 | #endif | |
81 | ||
5e805e44 JJ |
82 | #ifdef __GTHREAD_MUTEX_INIT |
83 | static __gthread_mutex_t random_lock = __GTHREAD_MUTEX_INIT; | |
84 | #else | |
85 | static __gthread_mutex_t random_lock; | |
86 | #endif | |
87 | ||
cdc5524f TK |
88 | /* Helper routines to map a GFC_UINTEGER_* to the corresponding |
89 | GFC_REAL_* types in the range of [0,1). If GFC_REAL_*_RADIX are 2 | |
90 | or 16, respectively, we mask off the bits that don't fit into the | |
91 | correct GFC_REAL_*, convert to the real type, then multiply by the | |
1b867ae7 | 92 | correct offset. */ |
cdc5524f TK |
93 | |
94 | ||
992b0aa1 | 95 | static void |
cdc5524f TK |
96 | rnumber_4 (GFC_REAL_4 *f, GFC_UINTEGER_4 v) |
97 | { | |
98 | GFC_UINTEGER_4 mask; | |
99 | #if GFC_REAL_4_RADIX == 2 | |
100 | mask = ~ (GFC_UINTEGER_4) 0u << (32 - GFC_REAL_4_DIGITS); | |
101 | #elif GFC_REAL_4_RADIX == 16 | |
102 | mask = ~ (GFC_UINTEGER_4) 0u << ((8 - GFC_REAL_4_DIGITS) * 4); | |
103 | #else | |
104 | #error "GFC_REAL_4_RADIX has unknown value" | |
105 | #endif | |
106 | v = v & mask; | |
a83169cd | 107 | *f = (GFC_REAL_4) v * GFC_REAL_4_LITERAL(0x1.p-32); |
cdc5524f TK |
108 | } |
109 | ||
992b0aa1 | 110 | static void |
cdc5524f TK |
111 | rnumber_8 (GFC_REAL_8 *f, GFC_UINTEGER_8 v) |
112 | { | |
113 | GFC_UINTEGER_8 mask; | |
114 | #if GFC_REAL_8_RADIX == 2 | |
115 | mask = ~ (GFC_UINTEGER_8) 0u << (64 - GFC_REAL_8_DIGITS); | |
116 | #elif GFC_REAL_8_RADIX == 16 | |
117 | mask = ~ (GFC_UINTEGER_8) 0u << (16 - GFC_REAL_8_DIGITS) * 4); | |
118 | #else | |
119 | #error "GFC_REAL_8_RADIX has unknown value" | |
120 | #endif | |
121 | v = v & mask; | |
a83169cd | 122 | *f = (GFC_REAL_8) v * GFC_REAL_8_LITERAL(0x1.p-64); |
cdc5524f TK |
123 | } |
124 | ||
125 | #ifdef HAVE_GFC_REAL_10 | |
5f251c26 | 126 | |
992b0aa1 | 127 | static void |
cdc5524f TK |
128 | rnumber_10 (GFC_REAL_10 *f, GFC_UINTEGER_8 v) |
129 | { | |
130 | GFC_UINTEGER_8 mask; | |
131 | #if GFC_REAL_10_RADIX == 2 | |
132 | mask = ~ (GFC_UINTEGER_8) 0u << (64 - GFC_REAL_10_DIGITS); | |
133 | #elif GFC_REAL_10_RADIX == 16 | |
134 | mask = ~ (GFC_UINTEGER_10) 0u << ((16 - GFC_REAL_10_DIGITS) * 4); | |
135 | #else | |
136 | #error "GFC_REAL_10_RADIX has unknown value" | |
137 | #endif | |
138 | v = v & mask; | |
a83169cd | 139 | *f = (GFC_REAL_10) v * GFC_REAL_10_LITERAL(0x1.p-64); |
cdc5524f TK |
140 | } |
141 | #endif | |
142 | ||
143 | #ifdef HAVE_GFC_REAL_16 | |
144 | ||
145 | /* For REAL(KIND=16), we only need to mask off the lower bits. */ | |
146 | ||
992b0aa1 | 147 | static void |
cdc5524f TK |
148 | rnumber_16 (GFC_REAL_16 *f, GFC_UINTEGER_8 v1, GFC_UINTEGER_8 v2) |
149 | { | |
150 | GFC_UINTEGER_8 mask; | |
151 | #if GFC_REAL_16_RADIX == 2 | |
152 | mask = ~ (GFC_UINTEGER_8) 0u << (128 - GFC_REAL_16_DIGITS); | |
153 | #elif GFC_REAL_16_RADIX == 16 | |
154 | mask = ~ (GFC_UINTEGER_8) 0u << ((32 - GFC_REAL_16_DIGITS) * 4); | |
155 | #else | |
156 | #error "GFC_REAL_16_RADIX has unknown value" | |
157 | #endif | |
158 | v2 = v2 & mask; | |
a83169cd FXC |
159 | *f = (GFC_REAL_16) v1 * GFC_REAL_16_LITERAL(0x1.p-64) |
160 | + (GFC_REAL_16) v2 * GFC_REAL_16_LITERAL(0x1.p-128); | |
cdc5524f TK |
161 | } |
162 | #endif | |
cdc5524f | 163 | |
cdc5524f | 164 | |
b152f5a2 JB |
165 | /* |
166 | ||
0e99e093 | 167 | We use the xoshiro256** generator, a fast high-quality generator |
b152f5a2 JB |
168 | that: |
169 | ||
170 | - passes TestU1 without any failures | |
171 | ||
172 | - provides a "jump" function making it easy to provide many | |
173 | independent parallel streams. | |
174 | ||
0e99e093 | 175 | - Long period of 2**256 - 1 |
b152f5a2 JB |
176 | |
177 | A description can be found at | |
178 | ||
0e99e093 | 179 | http://prng.di.unimi.it/ |
b152f5a2 JB |
180 | |
181 | or | |
182 | ||
0e99e093 | 183 | https://arxiv.org/abs/1805.01407 |
b152f5a2 JB |
184 | |
185 | The paper includes public domain source code which is the basis for | |
186 | the implementation below. | |
187 | ||
188 | */ | |
189 | typedef struct | |
190 | { | |
191 | bool init; | |
0e99e093 | 192 | uint64_t s[4]; |
b152f5a2 | 193 | } |
0e99e093 | 194 | prng_state; |
b152f5a2 JB |
195 | |
196 | ||
0eaee4ab JB |
197 | /* master_state is the only variable protected by random_lock. */ |
198 | static prng_state master_state = { .init = false, .s = { | |
199 | 0xad63fa1ed3b55f36ULL, 0xd94473e78978b497ULL, 0xbc60592a98172477ULL, | |
200 | 0xa3de7c6e81265301ULL } | |
cdc5524f TK |
201 | }; |
202 | ||
cdc5524f | 203 | |
b152f5a2 | 204 | static __gthread_key_t rand_state_key; |
cdc5524f | 205 | |
0e99e093 | 206 | static prng_state* |
b152f5a2 JB |
207 | get_rand_state (void) |
208 | { | |
209 | /* For single threaded apps. */ | |
0e99e093 | 210 | static prng_state rand_state; |
b152f5a2 JB |
211 | |
212 | if (__gthread_active_p ()) | |
213 | { | |
214 | void* p = __gthread_getspecific (rand_state_key); | |
215 | if (!p) | |
216 | { | |
0e99e093 | 217 | p = xcalloc (1, sizeof (prng_state)); |
b152f5a2 JB |
218 | __gthread_setspecific (rand_state_key, p); |
219 | } | |
220 | return p; | |
221 | } | |
222 | else | |
223 | return &rand_state; | |
224 | } | |
5f251c26 | 225 | |
0e99e093 JB |
226 | static inline uint64_t |
227 | rotl (const uint64_t x, int k) | |
228 | { | |
229 | return (x << k) | (x >> (64 - k)); | |
230 | } | |
231 | ||
5f251c26 | 232 | |
b152f5a2 | 233 | static uint64_t |
0e99e093 | 234 | prng_next (prng_state* rs) |
5f251c26 | 235 | { |
0e99e093 JB |
236 | const uint64_t result = rotl(rs->s[1] * 5, 7) * 9; |
237 | ||
238 | const uint64_t t = rs->s[1] << 17; | |
239 | ||
240 | rs->s[2] ^= rs->s[0]; | |
241 | rs->s[3] ^= rs->s[1]; | |
242 | rs->s[1] ^= rs->s[2]; | |
243 | rs->s[0] ^= rs->s[3]; | |
244 | ||
245 | rs->s[2] ^= t; | |
246 | ||
247 | rs->s[3] = rotl(rs->s[3], 45); | |
248 | ||
249 | return result; | |
b152f5a2 | 250 | } |
5f251c26 | 251 | |
5f251c26 | 252 | |
b152f5a2 | 253 | /* This is the jump function for the generator. It is equivalent to |
0e99e093 | 254 | 2^128 calls to prng_next(); it can be used to generate 2^128 |
b152f5a2 JB |
255 | non-overlapping subsequences for parallel computations. */ |
256 | ||
257 | static void | |
0e99e093 | 258 | jump (prng_state* rs) |
b152f5a2 | 259 | { |
0e99e093 JB |
260 | static const uint64_t JUMP[] = { 0x180ec6d33cfd0aba, 0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c }; |
261 | ||
262 | uint64_t s0 = 0; | |
263 | uint64_t s1 = 0; | |
264 | uint64_t s2 = 0; | |
265 | uint64_t s3 = 0; | |
7a157266 | 266 | for(size_t i = 0; i < sizeof JUMP / sizeof *JUMP; i++) |
0e99e093 JB |
267 | for(int b = 0; b < 64; b++) { |
268 | if (JUMP[i] & UINT64_C(1) << b) { | |
269 | s0 ^= rs->s[0]; | |
270 | s1 ^= rs->s[1]; | |
271 | s2 ^= rs->s[2]; | |
272 | s3 ^= rs->s[3]; | |
b152f5a2 | 273 | } |
0e99e093 JB |
274 | prng_next (rs); |
275 | } | |
5f251c26 | 276 | |
0e99e093 JB |
277 | rs->s[0] = s0; |
278 | rs->s[1] = s1; | |
279 | rs->s[2] = s2; | |
280 | rs->s[3] = s3; | |
281 | } | |
b152f5a2 | 282 | |
b152f5a2 | 283 | |
0e99e093 JB |
284 | /* Splitmix64 recommended by xoshiro author for initializing. After |
285 | getting one uint64_t value from the OS, this is used to fill in the | |
286 | rest of the xoshiro state. */ | |
b152f5a2 | 287 | |
0e99e093 JB |
288 | static uint64_t |
289 | splitmix64 (uint64_t x) | |
b152f5a2 | 290 | { |
0e99e093 JB |
291 | uint64_t z = (x += 0x9e3779b97f4a7c15); |
292 | z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; | |
293 | z = (z ^ (z >> 27)) * 0x94d049bb133111eb; | |
294 | return z ^ (z >> 31); | |
b152f5a2 JB |
295 | } |
296 | ||
b152f5a2 | 297 | |
0e99e093 | 298 | /* Get some bytes from the operating system in order to seed |
b152f5a2 JB |
299 | the PRNG. */ |
300 | ||
301 | static int | |
302 | getosrandom (void *buf, size_t buflen) | |
303 | { | |
9ad5c32a | 304 | /* rand_s is available in MinGW-w64 but not plain MinGW. */ |
add5b95a | 305 | #if defined(__MINGW64_VERSION_MAJOR) |
9ad5c32a | 306 | unsigned int* b = buf; |
7a157266 | 307 | for (size_t i = 0; i < buflen / sizeof (unsigned int); i++) |
9ad5c32a JB |
308 | rand_s (&b[i]); |
309 | return buflen; | |
310 | #else | |
d66d6d15 JB |
311 | #ifdef HAVE_GETENTROPY |
312 | if (getentropy (buf, buflen) == 0) | |
0e99e093 | 313 | return buflen; |
d66d6d15 | 314 | #endif |
b152f5a2 JB |
315 | int flags = O_RDONLY; |
316 | #ifdef O_CLOEXEC | |
317 | flags |= O_CLOEXEC; | |
318 | #endif | |
319 | int fd = open("/dev/urandom", flags); | |
320 | if (fd != -1) | |
321 | { | |
322 | int res = read(fd, buf, buflen); | |
323 | close (fd); | |
324 | return res; | |
325 | } | |
0e99e093 | 326 | uint64_t seed = 0x047f7684e9fc949dULL; |
b152f5a2 JB |
327 | time_t secs; |
328 | long usecs; | |
329 | if (gf_gettime (&secs, &usecs) == 0) | |
330 | { | |
331 | seed ^= secs; | |
332 | seed ^= usecs; | |
333 | } | |
334 | #ifdef HAVE_GETPID | |
335 | pid_t pid = getpid(); | |
336 | seed ^= pid; | |
337 | #endif | |
0e99e093 JB |
338 | size_t size = buflen < sizeof (uint64_t) ? buflen : sizeof (uint64_t); |
339 | memcpy (buf, &seed, size); | |
340 | return size; | |
9ad5c32a | 341 | #endif /* __MINGW64_VERSION_MAJOR */ |
b152f5a2 JB |
342 | } |
343 | ||
344 | ||
91151a73 JB |
345 | /* Initialize the random number generator for the current thread, |
346 | using the master state and the number of times we must jump. */ | |
347 | ||
348 | static void | |
0e99e093 | 349 | init_rand_state (prng_state* rs, const bool locked) |
91151a73 JB |
350 | { |
351 | if (!locked) | |
352 | __gthread_mutex_lock (&random_lock); | |
0eaee4ab | 353 | if (!master_state.init) |
91151a73 | 354 | { |
0e99e093 JB |
355 | uint64_t os_seed; |
356 | getosrandom (&os_seed, sizeof (os_seed)); | |
0eaee4ab | 357 | for (uint64_t i = 0; i < sizeof (master_state.s) / sizeof (uint64_t); i++) |
f77d2759 JB |
358 | { |
359 | os_seed = splitmix64 (os_seed); | |
0eaee4ab | 360 | master_state.s[i] = os_seed; |
f77d2759 | 361 | } |
0eaee4ab | 362 | master_state.init = true; |
91151a73 | 363 | } |
0eaee4ab JB |
364 | memcpy (&rs->s, master_state.s, sizeof (master_state.s)); |
365 | jump (&master_state); | |
91151a73 JB |
366 | if (!locked) |
367 | __gthread_mutex_unlock (&random_lock); | |
91151a73 JB |
368 | rs->init = true; |
369 | } | |
370 | ||
371 | ||
cdaa9fc4 | 372 | /* This function produces a REAL(4) value from the uniform distribution |
5f251c26 SK |
373 | with range [0,1). */ |
374 | ||
375 | void | |
7d7b8bfe | 376 | random_r4 (GFC_REAL_4 *x) |
5f251c26 | 377 | { |
0e99e093 | 378 | prng_state* rs = get_rand_state(); |
b152f5a2 JB |
379 | |
380 | if (unlikely (!rs->init)) | |
381 | init_rand_state (rs, false); | |
0e99e093 | 382 | uint64_t r = prng_next (rs); |
b152f5a2 JB |
383 | /* Take the higher bits, ensuring that a stream of real(4), real(8), |
384 | and real(10) will be identical (except for precision). */ | |
385 | uint32_t high = (uint32_t) (r >> 32); | |
386 | rnumber_4 (x, high); | |
5f251c26 | 387 | } |
7d7b8bfe | 388 | iexport(random_r4); |
5f251c26 SK |
389 | |
390 | /* This function produces a REAL(8) value from the uniform distribution | |
391 | with range [0,1). */ | |
392 | ||
393 | void | |
7d7b8bfe | 394 | random_r8 (GFC_REAL_8 *x) |
5f251c26 | 395 | { |
b152f5a2 | 396 | GFC_UINTEGER_8 r; |
0e99e093 | 397 | prng_state* rs = get_rand_state(); |
5f251c26 | 398 | |
b152f5a2 JB |
399 | if (unlikely (!rs->init)) |
400 | init_rand_state (rs, false); | |
0e99e093 | 401 | r = prng_next (rs); |
b152f5a2 | 402 | rnumber_8 (x, r); |
5f251c26 | 403 | } |
7d7b8bfe | 404 | iexport(random_r8); |
5f251c26 | 405 | |
cdc5524f TK |
406 | #ifdef HAVE_GFC_REAL_10 |
407 | ||
408 | /* This function produces a REAL(10) value from the uniform distribution | |
409 | with range [0,1). */ | |
410 | ||
411 | void | |
412 | random_r10 (GFC_REAL_10 *x) | |
413 | { | |
b152f5a2 | 414 | GFC_UINTEGER_8 r; |
0e99e093 | 415 | prng_state* rs = get_rand_state(); |
cdc5524f | 416 | |
b152f5a2 JB |
417 | if (unlikely (!rs->init)) |
418 | init_rand_state (rs, false); | |
0e99e093 | 419 | r = prng_next (rs); |
b152f5a2 | 420 | rnumber_10 (x, r); |
cdc5524f TK |
421 | } |
422 | iexport(random_r10); | |
423 | ||
424 | #endif | |
425 | ||
426 | /* This function produces a REAL(16) value from the uniform distribution | |
427 | with range [0,1). */ | |
428 | ||
429 | #ifdef HAVE_GFC_REAL_16 | |
430 | ||
431 | void | |
432 | random_r16 (GFC_REAL_16 *x) | |
433 | { | |
b152f5a2 | 434 | GFC_UINTEGER_8 r1, r2; |
0e99e093 | 435 | prng_state* rs = get_rand_state(); |
b152f5a2 JB |
436 | |
437 | if (unlikely (!rs->init)) | |
438 | init_rand_state (rs, false); | |
0e99e093 JB |
439 | r1 = prng_next (rs); |
440 | r2 = prng_next (rs); | |
b152f5a2 | 441 | rnumber_16 (x, r1, r2); |
cdc5524f TK |
442 | } |
443 | iexport(random_r16); | |
444 | ||
445 | ||
446 | #endif | |
b152f5a2 | 447 | |
5f251c26 SK |
448 | /* This function fills a REAL(4) array with values from the uniform |
449 | distribution with range [0,1). */ | |
450 | ||
451 | void | |
7d7b8bfe | 452 | arandom_r4 (gfc_array_r4 *x) |
5f251c26 | 453 | { |
e33e218b TK |
454 | index_type count[GFC_MAX_DIMENSIONS]; |
455 | index_type extent[GFC_MAX_DIMENSIONS]; | |
456 | index_type stride[GFC_MAX_DIMENSIONS]; | |
5f251c26 SK |
457 | index_type stride0; |
458 | index_type dim; | |
459 | GFC_REAL_4 *dest; | |
0e99e093 | 460 | prng_state* rs = get_rand_state(); |
b152f5a2 | 461 | |
21d1335b | 462 | dest = x->base_addr; |
5f251c26 | 463 | |
5f251c26 SK |
464 | dim = GFC_DESCRIPTOR_RANK (x); |
465 | ||
7a157266 | 466 | for (index_type n = 0; n < dim; n++) |
5f251c26 SK |
467 | { |
468 | count[n] = 0; | |
dfb55fdc TK |
469 | stride[n] = GFC_DESCRIPTOR_STRIDE(x,n); |
470 | extent[n] = GFC_DESCRIPTOR_EXTENT(x,n); | |
5f251c26 SK |
471 | if (extent[n] <= 0) |
472 | return; | |
473 | } | |
474 | ||
475 | stride0 = stride[0]; | |
476 | ||
b152f5a2 JB |
477 | if (unlikely (!rs->init)) |
478 | init_rand_state (rs, false); | |
5e805e44 | 479 | |
5f251c26 SK |
480 | while (dest) |
481 | { | |
1b867ae7 | 482 | /* random_r4 (dest); */ |
0e99e093 | 483 | uint64_t r = prng_next (rs); |
b152f5a2 JB |
484 | uint32_t high = (uint32_t) (r >> 32); |
485 | rnumber_4 (dest, high); | |
5f251c26 SK |
486 | |
487 | /* Advance to the next element. */ | |
488 | dest += stride0; | |
489 | count[0]++; | |
490 | /* Advance to the next source element. */ | |
7a157266 | 491 | index_type n = 0; |
5f251c26 SK |
492 | while (count[n] == extent[n]) |
493 | { | |
494 | /* When we get to the end of a dimension, reset it and increment | |
495 | the next dimension. */ | |
496 | count[n] = 0; | |
497 | /* We could precalculate these products, but this is a less | |
498 | frequently used path so probably not worth it. */ | |
499 | dest -= stride[n] * extent[n]; | |
500 | n++; | |
501 | if (n == dim) | |
502 | { | |
503 | dest = NULL; | |
504 | break; | |
505 | } | |
506 | else | |
507 | { | |
508 | count[n]++; | |
509 | dest += stride[n]; | |
510 | } | |
511 | } | |
512 | } | |
513 | } | |
514 | ||
cdaa9fc4 | 515 | /* This function fills a REAL(8) array with values from the uniform |
5f251c26 SK |
516 | distribution with range [0,1). */ |
517 | ||
518 | void | |
7d7b8bfe | 519 | arandom_r8 (gfc_array_r8 *x) |
5f251c26 | 520 | { |
e33e218b TK |
521 | index_type count[GFC_MAX_DIMENSIONS]; |
522 | index_type extent[GFC_MAX_DIMENSIONS]; | |
523 | index_type stride[GFC_MAX_DIMENSIONS]; | |
5f251c26 SK |
524 | index_type stride0; |
525 | index_type dim; | |
526 | GFC_REAL_8 *dest; | |
0e99e093 | 527 | prng_state* rs = get_rand_state(); |
5f251c26 | 528 | |
21d1335b | 529 | dest = x->base_addr; |
5f251c26 | 530 | |
5f251c26 SK |
531 | dim = GFC_DESCRIPTOR_RANK (x); |
532 | ||
7a157266 | 533 | for (index_type n = 0; n < dim; n++) |
5f251c26 SK |
534 | { |
535 | count[n] = 0; | |
dfb55fdc TK |
536 | stride[n] = GFC_DESCRIPTOR_STRIDE(x,n); |
537 | extent[n] = GFC_DESCRIPTOR_EXTENT(x,n); | |
5f251c26 SK |
538 | if (extent[n] <= 0) |
539 | return; | |
540 | } | |
541 | ||
542 | stride0 = stride[0]; | |
543 | ||
b152f5a2 JB |
544 | if (unlikely (!rs->init)) |
545 | init_rand_state (rs, false); | |
5e805e44 | 546 | |
5f251c26 SK |
547 | while (dest) |
548 | { | |
1b867ae7 | 549 | /* random_r8 (dest); */ |
0e99e093 | 550 | uint64_t r = prng_next (rs); |
b152f5a2 | 551 | rnumber_8 (dest, r); |
cdc5524f TK |
552 | |
553 | /* Advance to the next element. */ | |
554 | dest += stride0; | |
555 | count[0]++; | |
556 | /* Advance to the next source element. */ | |
7a157266 | 557 | index_type n = 0; |
cdc5524f TK |
558 | while (count[n] == extent[n]) |
559 | { | |
560 | /* When we get to the end of a dimension, reset it and increment | |
561 | the next dimension. */ | |
562 | count[n] = 0; | |
563 | /* We could precalculate these products, but this is a less | |
564 | frequently used path so probably not worth it. */ | |
565 | dest -= stride[n] * extent[n]; | |
566 | n++; | |
567 | if (n == dim) | |
568 | { | |
569 | dest = NULL; | |
570 | break; | |
571 | } | |
572 | else | |
573 | { | |
574 | count[n]++; | |
575 | dest += stride[n]; | |
576 | } | |
577 | } | |
578 | } | |
cdc5524f TK |
579 | } |
580 | ||
581 | #ifdef HAVE_GFC_REAL_10 | |
582 | ||
583 | /* This function fills a REAL(10) array with values from the uniform | |
584 | distribution with range [0,1). */ | |
585 | ||
586 | void | |
587 | arandom_r10 (gfc_array_r10 *x) | |
588 | { | |
589 | index_type count[GFC_MAX_DIMENSIONS]; | |
590 | index_type extent[GFC_MAX_DIMENSIONS]; | |
591 | index_type stride[GFC_MAX_DIMENSIONS]; | |
592 | index_type stride0; | |
593 | index_type dim; | |
594 | GFC_REAL_10 *dest; | |
0e99e093 | 595 | prng_state* rs = get_rand_state(); |
cdc5524f | 596 | |
21d1335b | 597 | dest = x->base_addr; |
cdc5524f TK |
598 | |
599 | dim = GFC_DESCRIPTOR_RANK (x); | |
600 | ||
7a157266 | 601 | for (index_type n = 0; n < dim; n++) |
cdc5524f TK |
602 | { |
603 | count[n] = 0; | |
dfb55fdc TK |
604 | stride[n] = GFC_DESCRIPTOR_STRIDE(x,n); |
605 | extent[n] = GFC_DESCRIPTOR_EXTENT(x,n); | |
cdc5524f TK |
606 | if (extent[n] <= 0) |
607 | return; | |
608 | } | |
609 | ||
610 | stride0 = stride[0]; | |
611 | ||
b152f5a2 JB |
612 | if (unlikely (!rs->init)) |
613 | init_rand_state (rs, false); | |
cdc5524f TK |
614 | |
615 | while (dest) | |
616 | { | |
1b867ae7 | 617 | /* random_r10 (dest); */ |
0e99e093 | 618 | uint64_t r = prng_next (rs); |
b152f5a2 | 619 | rnumber_10 (dest, r); |
cdc5524f TK |
620 | |
621 | /* Advance to the next element. */ | |
622 | dest += stride0; | |
623 | count[0]++; | |
624 | /* Advance to the next source element. */ | |
7a157266 | 625 | index_type n = 0; |
cdc5524f TK |
626 | while (count[n] == extent[n]) |
627 | { | |
628 | /* When we get to the end of a dimension, reset it and increment | |
629 | the next dimension. */ | |
630 | count[n] = 0; | |
631 | /* We could precalculate these products, but this is a less | |
632 | frequently used path so probably not worth it. */ | |
633 | dest -= stride[n] * extent[n]; | |
634 | n++; | |
635 | if (n == dim) | |
636 | { | |
637 | dest = NULL; | |
638 | break; | |
639 | } | |
640 | else | |
641 | { | |
642 | count[n]++; | |
643 | dest += stride[n]; | |
644 | } | |
645 | } | |
646 | } | |
cdc5524f TK |
647 | } |
648 | ||
649 | #endif | |
650 | ||
651 | #ifdef HAVE_GFC_REAL_16 | |
652 | ||
653 | /* This function fills a REAL(16) array with values from the uniform | |
654 | distribution with range [0,1). */ | |
655 | ||
656 | void | |
657 | arandom_r16 (gfc_array_r16 *x) | |
658 | { | |
659 | index_type count[GFC_MAX_DIMENSIONS]; | |
660 | index_type extent[GFC_MAX_DIMENSIONS]; | |
661 | index_type stride[GFC_MAX_DIMENSIONS]; | |
662 | index_type stride0; | |
663 | index_type dim; | |
664 | GFC_REAL_16 *dest; | |
0e99e093 | 665 | prng_state* rs = get_rand_state(); |
cdc5524f | 666 | |
21d1335b | 667 | dest = x->base_addr; |
cdc5524f TK |
668 | |
669 | dim = GFC_DESCRIPTOR_RANK (x); | |
670 | ||
7a157266 | 671 | for (index_type n = 0; n < dim; n++) |
cdc5524f TK |
672 | { |
673 | count[n] = 0; | |
dfb55fdc TK |
674 | stride[n] = GFC_DESCRIPTOR_STRIDE(x,n); |
675 | extent[n] = GFC_DESCRIPTOR_EXTENT(x,n); | |
cdc5524f TK |
676 | if (extent[n] <= 0) |
677 | return; | |
678 | } | |
679 | ||
680 | stride0 = stride[0]; | |
681 | ||
b152f5a2 JB |
682 | if (unlikely (!rs->init)) |
683 | init_rand_state (rs, false); | |
cdc5524f TK |
684 | |
685 | while (dest) | |
686 | { | |
1b867ae7 | 687 | /* random_r16 (dest); */ |
0e99e093 JB |
688 | uint64_t r1 = prng_next (rs); |
689 | uint64_t r2 = prng_next (rs); | |
b152f5a2 | 690 | rnumber_16 (dest, r1, r2); |
5f251c26 SK |
691 | |
692 | /* Advance to the next element. */ | |
693 | dest += stride0; | |
694 | count[0]++; | |
695 | /* Advance to the next source element. */ | |
7a157266 | 696 | index_type n = 0; |
5f251c26 SK |
697 | while (count[n] == extent[n]) |
698 | { | |
699 | /* When we get to the end of a dimension, reset it and increment | |
700 | the next dimension. */ | |
701 | count[n] = 0; | |
702 | /* We could precalculate these products, but this is a less | |
703 | frequently used path so probably not worth it. */ | |
704 | dest -= stride[n] * extent[n]; | |
705 | n++; | |
706 | if (n == dim) | |
707 | { | |
708 | dest = NULL; | |
709 | break; | |
710 | } | |
711 | else | |
712 | { | |
713 | count[n]++; | |
714 | dest += stride[n]; | |
715 | } | |
716 | } | |
717 | } | |
718 | } | |
719 | ||
cdc5524f TK |
720 | #endif |
721 | ||
2d3ca8b7 | 722 | |
09309e09 | 723 | /* Number of elements in master_state array. */ |
0eaee4ab | 724 | #define SZU64 (sizeof (master_state.s) / sizeof (uint64_t)) |
2d3ca8b7 | 725 | |
2d3ca8b7 | 726 | |
09309e09 | 727 | /* Keys for scrambling the seed in order to avoid poor seeds. */ |
2d3ca8b7 | 728 | |
09309e09 JB |
729 | static const uint64_t xor_keys[] = { |
730 | 0xbd0c5b6e50c2df49ULL, 0xd46061cd46e1df38ULL, 0xbb4f4d4ed6103544ULL, | |
0e99e093 | 731 | 0x114a583d0756ad39ULL |
09309e09 JB |
732 | }; |
733 | ||
734 | ||
735 | /* Since a XOR cipher is symmetric, we need only one routine, and we | |
736 | can use it both for encryption and decryption. */ | |
2d3ca8b7 FXC |
737 | |
738 | static void | |
09309e09 | 739 | scramble_seed (uint64_t *dest, const uint64_t *src) |
2d3ca8b7 | 740 | { |
7a157266 | 741 | for (size_t i = 0; i < SZU64; i++) |
09309e09 | 742 | dest[i] = src[i] ^ xor_keys[i]; |
2d3ca8b7 FXC |
743 | } |
744 | ||
745 | ||
7d7b8bfe | 746 | /* random_seed is used to seed the PRNG with either a default |
420aa7b8 | 747 | set of seeds or user specified set of seeds. random_seed |
5f251c26 SK |
748 | must be called with no argument or exactly one argument. */ |
749 | ||
750 | void | |
34b4bc5c | 751 | random_seed_i4 (GFC_INTEGER_4 *size, gfc_array_i4 *put, gfc_array_i4 *get) |
5f251c26 | 752 | { |
09309e09 | 753 | uint64_t seed[SZU64]; |
0eaee4ab | 754 | #define SZ (sizeof (master_state.s) / sizeof (GFC_INTEGER_4)) |
5e805e44 | 755 | |
34b4bc5c FXC |
756 | /* Check that we only have one argument present. */ |
757 | if ((size ? 1 : 0) + (put ? 1 : 0) + (get ? 1 : 0) > 1) | |
758 | runtime_error ("RANDOM_SEED should have at most one argument present."); | |
cdc5524f | 759 | |
b152f5a2 | 760 | if (size != NULL) |
0e99e093 | 761 | *size = SZ; |
b152f5a2 | 762 | |
0e99e093 | 763 | prng_state* rs = get_rand_state(); |
b152f5a2 JB |
764 | |
765 | /* Return the seed to GET data. */ | |
766 | if (get != NULL) | |
767 | { | |
768 | /* If the rank of the array is not 1, abort. */ | |
769 | if (GFC_DESCRIPTOR_RANK (get) != 1) | |
770 | runtime_error ("Array rank of GET is not 1."); | |
771 | ||
772 | /* If the array is too small, abort. */ | |
0e99e093 | 773 | if (GFC_DESCRIPTOR_EXTENT(get,0) < (index_type) SZ) |
b152f5a2 JB |
774 | runtime_error ("Array size of GET is too small."); |
775 | ||
776 | if (!rs->init) | |
777 | init_rand_state (rs, false); | |
778 | ||
779 | /* Unscramble the seed. */ | |
09309e09 | 780 | scramble_seed (seed, rs->s); |
b152f5a2 JB |
781 | |
782 | /* Then copy it back to the user variable. */ | |
783 | for (size_t i = 0; i < SZ ; i++) | |
784 | memcpy (&(get->base_addr[(SZ - 1 - i) * GFC_DESCRIPTOR_STRIDE(get,0)]), | |
09309e09 | 785 | (unsigned char*) seed + i * sizeof(GFC_UINTEGER_4), |
b152f5a2 | 786 | sizeof(GFC_UINTEGER_4)); |
b152f5a2 JB |
787 | } |
788 | ||
789 | else | |
790 | { | |
791 | __gthread_mutex_lock (&random_lock); | |
792 | ||
34b4bc5c FXC |
793 | /* From the standard: "If no argument is present, the processor assigns |
794 | a processor-dependent value to the seed." */ | |
795 | if (size == NULL && put == NULL && get == NULL) | |
b152f5a2 | 796 | { |
0eaee4ab | 797 | master_state.init = false; |
b152f5a2 JB |
798 | init_rand_state (rs, true); |
799 | } | |
5f251c26 SK |
800 | |
801 | if (put != NULL) | |
802 | { | |
cdaa9fc4 | 803 | /* If the rank of the array is not 1, abort. */ |
5f251c26 SK |
804 | if (GFC_DESCRIPTOR_RANK (put) != 1) |
805 | runtime_error ("Array rank of PUT is not 1."); | |
806 | ||
cdaa9fc4 | 807 | /* If the array is too small, abort. */ |
0e99e093 | 808 | if (GFC_DESCRIPTOR_EXTENT(put,0) < (index_type) SZ) |
5f251c26 SK |
809 | runtime_error ("Array size of PUT is too small."); |
810 | ||
2d3ca8b7 | 811 | /* We copy the seed given by the user. */ |
b152f5a2 | 812 | for (size_t i = 0; i < SZ; i++) |
09309e09 | 813 | memcpy ((unsigned char*) seed + i * sizeof(GFC_UINTEGER_4), |
b152f5a2 | 814 | &(put->base_addr[(SZ - 1 - i) * GFC_DESCRIPTOR_STRIDE(put,0)]), |
2d3ca8b7 FXC |
815 | sizeof(GFC_UINTEGER_4)); |
816 | ||
817 | /* We put it after scrambling the bytes, to paper around users who | |
818 | provide seeds with quality only in the lower or upper part. */ | |
0eaee4ab JB |
819 | scramble_seed (master_state.s, seed); |
820 | master_state.init = true; | |
b152f5a2 | 821 | init_rand_state (rs, true); |
b152f5a2 | 822 | } |
5f251c26 | 823 | |
5e805e44 | 824 | __gthread_mutex_unlock (&random_lock); |
b152f5a2 JB |
825 | } |
826 | #undef SZ | |
5f251c26 | 827 | } |
34b4bc5c FXC |
828 | iexport(random_seed_i4); |
829 | ||
830 | ||
831 | void | |
832 | random_seed_i8 (GFC_INTEGER_8 *size, gfc_array_i8 *put, gfc_array_i8 *get) | |
833 | { | |
09309e09 JB |
834 | uint64_t seed[SZU64]; |
835 | ||
34b4bc5c FXC |
836 | /* Check that we only have one argument present. */ |
837 | if ((size ? 1 : 0) + (put ? 1 : 0) + (get ? 1 : 0) > 1) | |
838 | runtime_error ("RANDOM_SEED should have at most one argument present."); | |
839 | ||
0eaee4ab | 840 | #define SZ (sizeof (master_state.s) / sizeof (GFC_INTEGER_8)) |
34b4bc5c | 841 | if (size != NULL) |
0e99e093 | 842 | *size = SZ; |
34b4bc5c | 843 | |
0e99e093 | 844 | prng_state* rs = get_rand_state(); |
b152f5a2 JB |
845 | |
846 | /* Return the seed to GET data. */ | |
847 | if (get != NULL) | |
34b4bc5c FXC |
848 | { |
849 | /* If the rank of the array is not 1, abort. */ | |
b152f5a2 JB |
850 | if (GFC_DESCRIPTOR_RANK (get) != 1) |
851 | runtime_error ("Array rank of GET is not 1."); | |
34b4bc5c FXC |
852 | |
853 | /* If the array is too small, abort. */ | |
0e99e093 | 854 | if (GFC_DESCRIPTOR_EXTENT(get,0) < (index_type) SZ) |
b152f5a2 JB |
855 | runtime_error ("Array size of GET is too small."); |
856 | ||
857 | if (!rs->init) | |
858 | init_rand_state (rs, false); | |
34b4bc5c | 859 | |
09309e09 JB |
860 | /* Unscramble the seed. */ |
861 | scramble_seed (seed, rs->s); | |
862 | ||
34b4bc5c | 863 | /* This code now should do correct strides. */ |
b152f5a2 | 864 | for (size_t i = 0; i < SZ; i++) |
09309e09 | 865 | memcpy (&(get->base_addr[i * GFC_DESCRIPTOR_STRIDE(get,0)]), &seed[i], |
34b4bc5c FXC |
866 | sizeof (GFC_UINTEGER_8)); |
867 | } | |
868 | ||
b152f5a2 JB |
869 | else |
870 | { | |
871 | __gthread_mutex_lock (&random_lock); | |
872 | ||
873 | /* From the standard: "If no argument is present, the processor assigns | |
874 | a processor-dependent value to the seed." */ | |
875 | if (size == NULL && put == NULL && get == NULL) | |
876 | { | |
0eaee4ab | 877 | master_state.init = false; |
b152f5a2 JB |
878 | init_rand_state (rs, true); |
879 | } | |
880 | ||
881 | if (put != NULL) | |
34b4bc5c FXC |
882 | { |
883 | /* If the rank of the array is not 1, abort. */ | |
b152f5a2 JB |
884 | if (GFC_DESCRIPTOR_RANK (put) != 1) |
885 | runtime_error ("Array rank of PUT is not 1."); | |
34b4bc5c FXC |
886 | |
887 | /* If the array is too small, abort. */ | |
0e99e093 | 888 | if (GFC_DESCRIPTOR_EXTENT(put,0) < (index_type) SZ) |
b152f5a2 | 889 | runtime_error ("Array size of PUT is too small."); |
34b4bc5c FXC |
890 | |
891 | /* This code now should do correct strides. */ | |
b152f5a2 | 892 | for (size_t i = 0; i < SZ; i++) |
09309e09 | 893 | memcpy (&seed[i], &(put->base_addr[i * GFC_DESCRIPTOR_STRIDE(put,0)]), |
34b4bc5c | 894 | sizeof (GFC_UINTEGER_8)); |
b152f5a2 | 895 | |
0eaee4ab JB |
896 | scramble_seed (master_state.s, seed); |
897 | master_state.init = true; | |
b152f5a2 | 898 | init_rand_state (rs, true); |
b152f5a2 JB |
899 | } |
900 | ||
34b4bc5c FXC |
901 | |
902 | __gthread_mutex_unlock (&random_lock); | |
b152f5a2 | 903 | } |
34b4bc5c FXC |
904 | } |
905 | iexport(random_seed_i8); | |
7d7b8bfe | 906 | |
5e805e44 | 907 | |
b152f5a2 | 908 | #if !defined __GTHREAD_MUTEX_INIT || defined __GTHREADS |
5e805e44 | 909 | static void __attribute__((constructor)) |
b152f5a2 | 910 | constructor_random (void) |
5e805e44 | 911 | { |
b152f5a2 | 912 | #ifndef __GTHREAD_MUTEX_INIT |
5e805e44 | 913 | __GTHREAD_MUTEX_INIT_FUNCTION (&random_lock); |
b152f5a2 JB |
914 | #endif |
915 | if (__gthread_active_p ()) | |
916 | __gthread_key_create (&rand_state_key, &free); | |
917 | } | |
918 | #endif | |
919 | ||
920 | #ifdef __GTHREADS | |
921 | static void __attribute__((destructor)) | |
922 | destructor_random (void) | |
923 | { | |
924 | if (__gthread_active_p ()) | |
925 | __gthread_key_delete (rand_state_key); | |
5e805e44 JJ |
926 | } |
927 | #endif |