1 // Optimizations for random number functions, x86 version -*- C++ -*-
3 // Copyright (C) 2012-2020 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
25 /** @file bits/opt_random.h
26 * This is an internal header file, included by other library headers.
27 * Do not attempt to use it directly. @headername{random}
30 #ifndef _BITS_OPT_RANDOM_H
31 #define _BITS_OPT_RANDOM_H 1
34 #include <pmmintrin.h>
38 #pragma GCC system_header
41 namespace std
_GLIBCXX_VISIBILITY(default)
43 _GLIBCXX_BEGIN_NAMESPACE_VERSION
47 template<typename _UniformRandomNumberGenerator
>
49 normal_distribution
<double>::
50 __generate(typename normal_distribution
<double>::result_type
* __f
,
51 typename normal_distribution
<double>::result_type
* __t
,
52 _UniformRandomNumberGenerator
& __urng
,
53 const param_type
& __param
)
55 typedef uint64_t __uctype
;
60 if (_M_saved_available
)
62 _M_saved_available
= false;
63 *__f
++ = _M_saved
* __param
.stddev() + __param
.mean();
69 constexpr uint64_t __maskval
= 0xfffffffffffffull
;
70 static const __m128i __mask
= _mm_set1_epi64x(__maskval
);
71 static const __m128i __two
= _mm_set1_epi64x(0x4000000000000000ull
);
72 static const __m128d __three
= _mm_set1_pd(3.0);
73 const __m128d __av
= _mm_set1_pd(__param
.mean());
75 const __uctype __urngmin
= __urng
.min();
76 const __uctype __urngmax
= __urng
.max();
77 const __uctype __urngrange
= __urngmax
- __urngmin
;
78 const __uctype __uerngrange
= __urngrange
+ 1;
92 if (__urngrange
> __maskval
)
94 if (__detail::_Power_of_2(__uerngrange
))
95 __v
.__i
= _mm_and_si128(_mm_set_epi64x(__urng(),
100 const __uctype __uerange
= __maskval
+ 1;
101 const __uctype __scaling
= __urngrange
/ __uerange
;
102 const __uctype __past
= __uerange
* __scaling
;
105 __v1
= __uctype(__urng()) - __urngmin
;
106 while (__v1
>= __past
);
110 __v2
= __uctype(__urng()) - __urngmin
;
111 while (__v2
>= __past
);
114 __v
.__i
= _mm_set_epi64x(__v1
, __v2
);
117 else if (__urngrange
== __maskval
)
118 __v
.__i
= _mm_set_epi64x(__urng(), __urng());
119 else if ((__urngrange
+ 2) * __urngrange
>= __maskval
120 && __detail::_Power_of_2(__uerngrange
))
122 uint64_t __v1
= __urng() * __uerngrange
+ __urng();
123 uint64_t __v2
= __urng() * __uerngrange
+ __urng();
125 __v
.__i
= _mm_and_si128(_mm_set_epi64x(__v1
, __v2
),
131 __uctype __high
= __maskval
/ __uerngrange
/ __uerngrange
;
132 while (__high
> __uerngrange
)
135 __high
/= __uerngrange
;
137 const __uctype __highrange
= __high
+ 1;
138 const __uctype __scaling
= __urngrange
/ __highrange
;
139 const __uctype __past
= __highrange
* __scaling
;
146 __tmp
= __uctype(__urng()) - __urngmin
;
147 while (__tmp
>= __past
);
148 __v1
= __tmp
/ __scaling
;
149 for (size_t __cnt
= 0; __cnt
< __nrng
; ++__cnt
)
152 __v1
*= __uerngrange
;
153 __v1
+= __uctype(__urng()) - __urngmin
;
156 while (__v1
> __maskval
|| __v1
< __tmp
);
162 __tmp
= __uctype(__urng()) - __urngmin
;
163 while (__tmp
>= __past
);
164 __v2
= __tmp
/ __scaling
;
165 for (size_t __cnt
= 0; __cnt
< __nrng
; ++__cnt
)
168 __v2
*= __uerngrange
;
169 __v2
+= __uctype(__urng()) - __urngmin
;
172 while (__v2
> __maskval
|| __v2
< __tmp
);
174 __v
.__i
= _mm_set_epi64x(__v1
, __v2
);
177 __v
.__i
= _mm_or_si128(__v
.__i
, __two
);
178 __x
= _mm_sub_pd(__v
.__d
, __three
);
179 __m128d __m
= _mm_mul_pd(__x
, __x
);
180 __le
= _mm_cvtsd_f64(_mm_hadd_pd (__m
, __m
));
182 while (__le
== 0.0 || __le
>= 1.0);
184 double __mult
= (std::sqrt(-2.0 * std::log(__le
) / __le
)
187 __x
= _mm_add_pd(_mm_mul_pd(__x
, _mm_set1_pd(__mult
)), __av
);
189 _mm_storeu_pd(__f
, __x
);
195 result_type __x
, __y
, __r2
;
197 __detail::_Adaptor
<_UniformRandomNumberGenerator
, result_type
>
202 __x
= result_type(2.0) * __aurng() - 1.0;
203 __y
= result_type(2.0) * __aurng() - 1.0;
204 __r2
= __x
* __x
+ __y
* __y
;
206 while (__r2
> 1.0 || __r2
== 0.0);
208 const result_type __mult
= std::sqrt(-2 * std::log(__r2
) / __r2
);
209 _M_saved
= __x
* __mult
;
210 _M_saved_available
= true;
211 *__f
= __y
* __mult
* __param
.stddev() + __param
.mean();
217 _GLIBCXX_END_NAMESPACE_VERSION
221 #endif // _BITS_OPT_RANDOM_H