1 // Optimizations for random number functions, x86 version -*- C++ -*-
3 // Copyright (C) 2012 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
25 /** @file bits/opt_random.h
26 * This is an internal header file, included by other library headers.
27 * Do not attempt to use it directly. @headername{random}
30 #ifndef _BITS_OPT_RANDOM_H
31 #define _BITS_OPT_RANDOM_H 1
33 #include <x86intrin.h>
36 #pragma GCC system_header
39 namespace std
_GLIBCXX_VISIBILITY(default)
41 _GLIBCXX_BEGIN_NAMESPACE_VERSION
45 template<typename _UniformRandomNumberGenerator
>
47 normal_distribution
<double>::
48 __generate(typename normal_distribution
<double>::result_type
* __f
,
49 typename normal_distribution
<double>::result_type
* __t
,
50 _UniformRandomNumberGenerator
& __urng
,
51 const param_type
& __param
)
53 typedef uint64_t __uctype
;
58 if (_M_saved_available
)
60 _M_saved_available
= false;
61 *__f
++ = _M_saved
* __param
.stddev() + __param
.mean();
67 constexpr uint64_t __maskval
= UINT64_C(0xfffffffffffff);
68 static const __m128i __mask
= _mm_set1_epi64x(__maskval
);
69 static const __m128i __two
= _mm_set1_epi64x(0x4000000000000000ull
);
70 static const __m128d __three
= _mm_set1_pd(3.0);
71 const __m128d __av
= _mm_set1_pd(__param
.mean());
73 const __uctype __urngmin
= __urng
.min();
74 const __uctype __urngmax
= __urng
.max();
75 const __uctype __urngrange
= __urngmax
- __urngmin
;
76 const __uctype __uerngrange
= __urngrange
+ 1;
90 if (__urngrange
> __maskval
)
92 if (__detail::_Power_of_2(__uerngrange
))
93 __v
.__i
= _mm_and_si128(_mm_set_epi64x(__urng(),
98 const __uctype __uerange
= __maskval
+ 1;
99 const __uctype __scaling
= __urngrange
/ __uerange
;
100 const __uctype __past
= __uerange
* __scaling
;
103 __v1
= __uctype(__urng()) - __urngmin
;
104 while (__v1
>= __past
);
108 __v2
= __uctype(__urng()) - __urngmin
;
109 while (__v2
>= __past
);
112 __v
.__i
= _mm_set_epi64x(__v1
, __v2
);
115 else if (__urngrange
== __maskval
)
116 __v
.__i
= _mm_set_epi64x(__urng(), __urng());
117 else if ((__urngrange
+ 2) * __urngrange
>= __maskval
118 && __detail::_Power_of_2(__uerngrange
))
120 uint64_t __v1
= __urng() * __uerngrange
+ __urng();
121 uint64_t __v2
= __urng() * __uerngrange
+ __urng();
123 __v
.__i
= _mm_and_si128(_mm_set_epi64x(__v1
, __v2
),
129 __uctype __high
= __maskval
/ __uerngrange
/ __uerngrange
;
130 while (__high
> __uerngrange
)
133 __high
/= __uerngrange
;
135 const __uctype __highrange
= __high
+ 1;
136 const __uctype __scaling
= __urngrange
/ __highrange
;
137 const __uctype __past
= __highrange
* __scaling
;
144 __tmp
= __uctype(__urng()) - __urngmin
;
145 while (__tmp
>= __past
);
146 __v1
= __tmp
/ __scaling
;
147 for (size_t __cnt
= 0; __cnt
< __nrng
; ++__cnt
)
150 __v1
*= __uerngrange
;
151 __v1
+= __uctype(__urng()) - __urngmin
;
154 while (__v1
> __maskval
|| __v1
< __tmp
);
160 __tmp
= __uctype(__urng()) - __urngmin
;
161 while (__tmp
>= __past
);
162 __v2
= __tmp
/ __scaling
;
163 for (size_t __cnt
= 0; __cnt
< __nrng
; ++__cnt
)
166 __v2
*= __uerngrange
;
167 __v2
+= __uctype(__urng()) - __urngmin
;
170 while (__v2
> __maskval
|| __v2
< __tmp
);
172 __v
.__i
= _mm_set_epi64x(__v1
, __v2
);
175 __v
.__i
= _mm_or_si128(__v
.__i
, __two
);
176 __x
= _mm_sub_pd(__v
.__d
, __three
);
177 __m128d __m
= _mm_mul_pd(__x
, __x
);
178 __le
= _mm_cvtsd_f64(_mm_hadd_pd (__m
, __m
));
180 while (__le
== 0.0 || __le
>= 1.0);
182 double __mult
= (std::sqrt(-2.0 * std::log(__le
) / __le
)
185 __x
= _mm_add_pd(_mm_mul_pd(__x
, _mm_set1_pd(__mult
)), __av
);
187 _mm_storeu_pd(__f
, __x
);
193 result_type __x
, __y
, __r2
;
195 __detail::_Adaptor
<_UniformRandomNumberGenerator
, result_type
>
200 __x
= result_type(2.0) * __aurng() - 1.0;
201 __y
= result_type(2.0) * __aurng() - 1.0;
202 __r2
= __x
* __x
+ __y
* __y
;
204 while (__r2
> 1.0 || __r2
== 0.0);
206 const result_type __mult
= std::sqrt(-2 * std::log(__r2
) / __r2
);
207 _M_saved
= __x
* __mult
;
208 _M_saved_available
= true;
209 *__f
= __y
* __mult
* __param
.stddev() + __param
.mean();
215 _GLIBCXX_END_NAMESPACE_VERSION
219 #endif // _BITS_OPT_RANDOM_H