i686-w64-mingw32/bits/opt_random.h

// Optimizations for random number functions, x86 version -*- C++ -*-

// Copyright (C) 2012-2021 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library.  This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.

// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.

// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
// <http://www.gnu.org/licenses/>.

/** @file bits/opt_random.h
 *  This is an internal header file, included by other library headers.
 *  Do not attempt to use it directly. @headername{random}
 */

#ifndef _BITS_OPT_RANDOM_H
#define _BITS_OPT_RANDOM_H 1

#ifdef __SSE3__
#include <pmmintrin.h>
#endif


#pragma GCC system_header


namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION

#ifdef __SSE3__
  template<>
    template<typename _UniformRandomNumberGenerator>
      void
      normal_distribution<double>::
      __generate(typename normal_distribution<double>::result_type* __f,
                 typename normal_distribution<double>::result_type* __t,
                 _UniformRandomNumberGenerator& __urng,
                 const param_type& __param)
      {
        typedef uint64_t __uctype;

        if (__f == __t)
          return;

        if (_M_saved_available)
          {
            _M_saved_available = false;
            *__f++ = _M_saved * __param.stddev() + __param.mean();

            if (__f == __t)
              return;
          }

        constexpr uint64_t __maskval = 0xfffffffffffffull;
        static const __m128i __mask = _mm_set1_epi64x(__maskval);
        static const __m128i __two = _mm_set1_epi64x(0x4000000000000000ull);
        static const __m128d __three = _mm_set1_pd(3.0);
        const __m128d __av = _mm_set1_pd(__param.mean());

        const __uctype __urngmin = __urng.min();
        const __uctype __urngmax = __urng.max();
        const __uctype __urngrange = __urngmax - __urngmin;
        const __uctype __uerngrange = __urngrange + 1;

        while (__f + 1 < __t)
          {
            double __le;
            __m128d __x;
            do
              {
                union
                {
                  __m128i __i;
                  __m128d __d;
                } __v;

                if (__urngrange > __maskval)
                  {
                    if (__detail::_Power_of_2(__uerngrange))
                      __v.__i = _mm_and_si128(_mm_set_epi64x(__urng(),
                                                             __urng()),
                                              __mask);
                    else
                      {
                        const __uctype __uerange = __maskval + 1;
                        const __uctype __scaling = __urngrange / __uerange;
                        const __uctype __past = __uerange * __scaling;
                        uint64_t __v1;
                        do
                          __v1 = __uctype(__urng()) - __urngmin;
                        while (__v1 >= __past);
                        __v1 /= __scaling;
                        uint64_t __v2;
                        do
                          __v2 = __uctype(__urng()) - __urngmin;
                        while (__v2 >= __past);
                        __v2 /= __scaling;

                        __v.__i = _mm_set_epi64x(__v1, __v2);
                      }
                  }
                else if (__urngrange == __maskval)
                  __v.__i = _mm_set_epi64x(__urng(), __urng());
                else if ((__urngrange + 2) * __urngrange >= __maskval
                         && __detail::_Power_of_2(__uerngrange))
                  {
                    uint64_t __v1 = __urng() * __uerngrange + __urng();
                    uint64_t __v2 = __urng() * __uerngrange + __urng();

                    __v.__i = _mm_and_si128(_mm_set_epi64x(__v1, __v2),
                                            __mask);
                  }
                else
                  {
                    size_t __nrng = 2;
                    __uctype __high = __maskval / __uerngrange / __uerngrange;
                    while (__high > __uerngrange)
                      {
                        ++__nrng;
                        __high /= __uerngrange;
                      }
                    const __uctype __highrange = __high + 1;
                    const __uctype __scaling = __urngrange / __highrange;
                    const __uctype __past = __highrange * __scaling;
                    __uctype __tmp;

                    uint64_t __v1;
                    do
                      {
                        do
                          __tmp = __uctype(__urng()) - __urngmin;
                        while (__tmp >= __past);
                        __v1 = __tmp / __scaling;
                        for (size_t __cnt = 0; __cnt < __nrng; ++__cnt)
                          {
                            __tmp = __v1;
                            __v1 *= __uerngrange;
                            __v1 += __uctype(__urng()) - __urngmin;
                          }
                      }
                    while (__v1 > __maskval || __v1 < __tmp);

                    uint64_t __v2;
                    do
                      {
                        do
                          __tmp = __uctype(__urng()) - __urngmin;
                        while (__tmp >= __past);
                        __v2 = __tmp / __scaling;
                        for (size_t __cnt = 0; __cnt < __nrng; ++__cnt)
                          {
                            __tmp = __v2;
                            __v2 *= __uerngrange;
                            __v2 += __uctype(__urng()) - __urngmin;
                          }
                      }
                    while (__v2 > __maskval || __v2 < __tmp);

                    __v.__i = _mm_set_epi64x(__v1, __v2);
                  }

                __v.__i = _mm_or_si128(__v.__i, __two);
                __x = _mm_sub_pd(__v.__d, __three);
                __m128d __m = _mm_mul_pd(__x, __x);
                __le = _mm_cvtsd_f64(_mm_hadd_pd (__m, __m));
              }
            while (__le == 0.0 || __le >= 1.0);

            double __mult = (std::sqrt(-2.0 * std::log(__le) / __le)
                             * __param.stddev());

            __x = _mm_add_pd(_mm_mul_pd(__x, _mm_set1_pd(__mult)), __av);

            _mm_storeu_pd(__f, __x);
            __f += 2;
          }

        if (__f != __t)
          {
            result_type __x, __y, __r2;

            __detail::_Adaptor<_UniformRandomNumberGenerator, result_type>
              __aurng(__urng);

            do
              {
                __x = result_type(2.0) * __aurng() - 1.0;
                __y = result_type(2.0) * __aurng() - 1.0;
                __r2 = __x * __x + __y * __y;
              }
            while (__r2 > 1.0 || __r2 == 0.0);

            const result_type __mult = std::sqrt(-2 * std::log(__r2) / __r2);
            _M_saved = __x * __mult;
            _M_saved_available = true;
            *__f = __y * __mult * __param.stddev() + __param.mean();
          }
      }
#endif


_GLIBCXX_END_NAMESPACE_VERSION
} // namespace


#endif // _BITS_OPT_RANDOM_H
Revision:	1166
Committed:	Tue Oct 26 14:22:36 2021 UTC (4 years ago) by rossy
Content type:	text/x-chdr
File size:	6194 byte(s)
Log Message:	Daodan: Replace MinGW build env with an up-to-date MSYS2 env
#	Content
1	// Optimizations for random number functions, x86 version -- C++ --
2
3	// Copyright (C) 2012-2021 Free Software Foundation, Inc.
4	//
5	// This file is part of the GNU ISO C++ Library. This library is free
6	// software; you can redistribute it and/or modify it under the
7	// terms of the GNU General Public License as published by the
8	// Free Software Foundation; either version 3, or (at your option)
9	// any later version.
10
11	// This library is distributed in the hope that it will be useful,
12	// but WITHOUT ANY WARRANTY; without even the implied warranty of
13	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	// GNU General Public License for more details.
15
16	// Under Section 7 of GPL version 3, you are granted additional
17	// permissions described in the GCC Runtime Library Exception, version
18	// 3.1, as published by the Free Software Foundation.
19
20	// You should have received a copy of the GNU General Public License and
21	// a copy of the GCC Runtime Library Exception along with this program;
22	// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23	// <http://www.gnu.org/licenses/>.
24
25	/** @file bits/opt_random.h
26	* This is an internal header file, included by other library headers.
27	* Do not attempt to use it directly. @headername{random}
28	*/
29
30	#ifndef _BITS_OPT_RANDOM_H
31	#define _BITS_OPT_RANDOM_H 1
32
33	#ifdef __SSE3__
34	#include <pmmintrin.h>
35	#endif
36
37
38	#pragma GCC system_header
39
40
41	namespace std _GLIBCXX_VISIBILITY(default)
42	{
43	_GLIBCXX_BEGIN_NAMESPACE_VERSION
44
45	#ifdef __SSE3__
46	template<>
47	template<typename _UniformRandomNumberGenerator>
48	void
49	normal_distribution<double>::
50	__generate(typename normal_distribution<double>::result_type* __f,
51	typename normal_distribution<double>::result_type* __t,
52	_UniformRandomNumberGenerator& __urng,
53	const param_type& __param)
54	{
55	typedef uint64_t __uctype;
56
57	if (__f == __t)
58	return;
59
60	if (_M_saved_available)
61	{
62	_M_saved_available = false;
63	__f++ = _M_saved __param.stddev() + __param.mean();
64
65	if (__f == __t)
66	return;
67	}
68
69	constexpr uint64_t __maskval = 0xfffffffffffffull;
70	static const __m128i __mask = _mm_set1_epi64x(__maskval);
71	static const __m128i __two = _mm_set1_epi64x(0x4000000000000000ull);
72	static const __m128d __three = _mm_set1_pd(3.0);
73	const __m128d __av = _mm_set1_pd(__param.mean());
74
75	const __uctype __urngmin = __urng.min();
76	const __uctype __urngmax = __urng.max();
77	const __uctype __urngrange = __urngmax - __urngmin;
78	const __uctype __uerngrange = __urngrange + 1;
79
80	while (__f + 1 < __t)
81	{
82	double __le;
83	__m128d __x;
84	do
85	{
86	union
87	{
88	__m128i __i;
89	__m128d __d;
90	} __v;
91
92	if (__urngrange > __maskval)
93	{
94	if (__detail::_Power_of_2(__uerngrange))
95	__v.__i = _mm_and_si128(_mm_set_epi64x(__urng(),
96	__urng()),
97	__mask);
98	else
99	{
100	const __uctype __uerange = __maskval + 1;
101	const __uctype __scaling = __urngrange / __uerange;
102	const __uctype __past = __uerange * __scaling;
103	uint64_t __v1;
104	do
105	__v1 = __uctype(__urng()) - __urngmin;
106	while (__v1 >= __past);
107	__v1 /= __scaling;
108	uint64_t __v2;
109	do
110	__v2 = __uctype(__urng()) - __urngmin;
111	while (__v2 >= __past);
112	__v2 /= __scaling;
113
114	__v.__i = _mm_set_epi64x(__v1, __v2);
115	}
116	}
117	else if (__urngrange == __maskval)
118	__v.__i = _mm_set_epi64x(__urng(), __urng());
119	else if ((__urngrange + 2) * __urngrange >= __maskval
120	&& __detail::_Power_of_2(__uerngrange))
121	{
122	uint64_t __v1 = __urng() * __uerngrange + __urng();
123	uint64_t __v2 = __urng() * __uerngrange + __urng();
124
125	__v.__i = _mm_and_si128(_mm_set_epi64x(__v1, __v2),
126	__mask);
127	}
128	else
129	{
130	size_t __nrng = 2;
131	__uctype __high = __maskval / __uerngrange / __uerngrange;
132	while (__high > __uerngrange)
133	{
134	++__nrng;
135	__high /= __uerngrange;
136	}
137	const __uctype __highrange = __high + 1;
138	const __uctype __scaling = __urngrange / __highrange;
139	const __uctype __past = __highrange * __scaling;
140	__uctype __tmp;
141
142	uint64_t __v1;
143	do
144	{
145	do
146	__tmp = __uctype(__urng()) - __urngmin;
147	while (__tmp >= __past);
148	__v1 = __tmp / __scaling;
149	for (size_t __cnt = 0; __cnt < __nrng; ++__cnt)
150	{
151	__tmp = __v1;
152	__v1 *= __uerngrange;
153	__v1 += __uctype(__urng()) - __urngmin;
154	}
155	}
156	while (__v1 > __maskval \|\| __v1 < __tmp);
157
158	uint64_t __v2;
159	do
160	{
161	do
162	__tmp = __uctype(__urng()) - __urngmin;
163	while (__tmp >= __past);
164	__v2 = __tmp / __scaling;
165	for (size_t __cnt = 0; __cnt < __nrng; ++__cnt)
166	{
167	__tmp = __v2;
168	__v2 *= __uerngrange;
169	__v2 += __uctype(__urng()) - __urngmin;
170	}
171	}
172	while (__v2 > __maskval \|\| __v2 < __tmp);
173
174	__v.__i = _mm_set_epi64x(__v1, __v2);
175	}
176
177	__v.__i = _mm_or_si128(__v.__i, __two);
178	__x = _mm_sub_pd(__v.__d, __three);
179	__m128d __m = _mm_mul_pd(__x, __x);
180	__le = _mm_cvtsd_f64(_mm_hadd_pd (__m, __m));
181	}
182	while (__le == 0.0 \|\| __le >= 1.0);
183
184	double __mult = (std::sqrt(-2.0 * std::log(__le) / __le)
185	* __param.stddev());
186
187	__x = _mm_add_pd(_mm_mul_pd(__x, _mm_set1_pd(__mult)), __av);
188
189	_mm_storeu_pd(__f, __x);
190	__f += 2;
191	}
192
193	if (__f != __t)
194	{
195	result_type __x, __y, __r2;
196
197	__detail::_Adaptor<_UniformRandomNumberGenerator, result_type>
198	__aurng(__urng);
199
200	do
201	{
202	__x = result_type(2.0) * __aurng() - 1.0;
203	__y = result_type(2.0) * __aurng() - 1.0;
204	__r2 = __x * __x + __y * __y;
205	}
206	while (__r2 > 1.0 \|\| __r2 == 0.0);
207
208	const result_type __mult = std::sqrt(-2 * std::log(__r2) / __r2);
209	_M_saved = __x * __mult;
210	_M_saved_available = true;
211	__f = __y __mult * __param.stddev() + __param.mean();
212	}
213	}
214	#endif
215
216
217	_GLIBCXX_END_NAMESPACE_VERSION
218	} // namespace
219
220
221	#endif // _BITS_OPT_RANDOM_H