| 1 |
// -*- C++ -*- |
| 2 |
|
| 3 |
// Copyright (C) 2007-2021 Free Software Foundation, Inc. |
| 4 |
// |
| 5 |
// This file is part of the GNU ISO C++ Library. This library is free |
| 6 |
// software; you can redistribute it and/or modify it under the terms |
| 7 |
// of the GNU General Public License as published by the Free Software |
| 8 |
// Foundation; either version 3, or (at your option) any later |
| 9 |
// version. |
| 10 |
|
| 11 |
// This library is distributed in the hope that it will be useful, but |
| 12 |
// WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 |
// General Public License for more details. |
| 15 |
|
| 16 |
// Under Section 7 of GPL version 3, you are granted additional |
| 17 |
// permissions described in the GCC Runtime Library Exception, version |
| 18 |
// 3.1, as published by the Free Software Foundation. |
| 19 |
|
| 20 |
// You should have received a copy of the GNU General Public License and |
| 21 |
// a copy of the GCC Runtime Library Exception along with this program; |
| 22 |
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| 23 |
// <http://www.gnu.org/licenses/>. |
| 24 |
|
| 25 |
/** @file parallel/random_shuffle.h |
| 26 |
* @brief Parallel implementation of std::random_shuffle(). |
| 27 |
* This file is a GNU parallel extension to the Standard C++ Library. |
| 28 |
*/ |
| 29 |
|
| 30 |
// Written by Johannes Singler. |
| 31 |
|
| 32 |
#ifndef _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H |
| 33 |
#define _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H 1 |
| 34 |
|
| 35 |
#include <limits> |
| 36 |
#include <bits/stl_numeric.h> |
| 37 |
#include <parallel/parallel.h> |
| 38 |
#include <parallel/random_number.h> |
| 39 |
|
| 40 |
namespace __gnu_parallel |
| 41 |
{ |
| 42 |
/** @brief Type to hold the index of a bin. |
| 43 |
* |
| 44 |
* Since many variables of this type are allocated, it should be |
| 45 |
* chosen as small as possible. |
| 46 |
*/ |
| 47 |
typedef unsigned short _BinIndex; |
| 48 |
|
| 49 |
/** @brief Data known to every thread participating in |
| 50 |
__gnu_parallel::__parallel_random_shuffle(). */ |
| 51 |
template<typename _RAIter> |
| 52 |
struct _DRandomShufflingGlobalData |
| 53 |
{ |
| 54 |
typedef std::iterator_traits<_RAIter> _TraitsType; |
| 55 |
typedef typename _TraitsType::value_type _ValueType; |
| 56 |
typedef typename _TraitsType::difference_type _DifferenceType; |
| 57 |
|
| 58 |
/** @brief Begin iterator of the __source. */ |
| 59 |
_RAIter& _M_source; |
| 60 |
|
| 61 |
/** @brief Temporary arrays for each thread. */ |
| 62 |
_ValueType** _M_temporaries; |
| 63 |
|
| 64 |
/** @brief Two-dimensional array to hold the thread-bin distribution. |
| 65 |
* |
| 66 |
* Dimensions (_M_num_threads + 1) __x (_M_num_bins + 1). */ |
| 67 |
_DifferenceType** _M_dist; |
| 68 |
|
| 69 |
/** @brief Start indexes of the threads' __chunks. */ |
| 70 |
_DifferenceType* _M_starts; |
| 71 |
|
| 72 |
/** @brief Number of the thread that will further process the |
| 73 |
corresponding bin. */ |
| 74 |
_ThreadIndex* _M_bin_proc; |
| 75 |
|
| 76 |
/** @brief Number of bins to distribute to. */ |
| 77 |
int _M_num_bins; |
| 78 |
|
| 79 |
/** @brief Number of bits needed to address the bins. */ |
| 80 |
int _M_num_bits; |
| 81 |
|
| 82 |
/** @brief Constructor. */ |
| 83 |
_DRandomShufflingGlobalData(_RAIter& __source) |
| 84 |
: _M_source(__source) { } |
| 85 |
}; |
| 86 |
|
| 87 |
/** @brief Local data for a thread participating in |
| 88 |
__gnu_parallel::__parallel_random_shuffle(). |
| 89 |
*/ |
| 90 |
template<typename _RAIter, typename _RandomNumberGenerator> |
| 91 |
struct _DRSSorterPU |
| 92 |
{ |
| 93 |
/** @brief Number of threads participating in total. */ |
| 94 |
int _M_num_threads; |
| 95 |
|
| 96 |
/** @brief Begin index for bins taken care of by this thread. */ |
| 97 |
_BinIndex _M_bins_begin; |
| 98 |
|
| 99 |
/** @brief End index for bins taken care of by this thread. */ |
| 100 |
_BinIndex __bins_end; |
| 101 |
|
| 102 |
/** @brief Random _M_seed for this thread. */ |
| 103 |
uint32_t _M_seed; |
| 104 |
|
| 105 |
/** @brief Pointer to global data. */ |
| 106 |
_DRandomShufflingGlobalData<_RAIter>* _M_sd; |
| 107 |
}; |
| 108 |
|
| 109 |
/** @brief Generate a random number in @c [0,2^__logp). |
| 110 |
* @param __logp Logarithm (basis 2) of the upper range __bound. |
| 111 |
* @param __rng Random number generator to use. |
| 112 |
*/ |
| 113 |
template<typename _RandomNumberGenerator> |
| 114 |
inline int |
| 115 |
__random_number_pow2(int __logp, _RandomNumberGenerator& __rng) |
| 116 |
{ return __rng.__genrand_bits(__logp); } |
| 117 |
|
| 118 |
/** @brief Random shuffle code executed by each thread. |
| 119 |
* @param __pus Array of thread-local data records. */ |
| 120 |
template<typename _RAIter, typename _RandomNumberGenerator> |
| 121 |
void |
| 122 |
__parallel_random_shuffle_drs_pu(_DRSSorterPU<_RAIter, |
| 123 |
_RandomNumberGenerator>* __pus) |
| 124 |
{ |
| 125 |
typedef std::iterator_traits<_RAIter> _TraitsType; |
| 126 |
typedef typename _TraitsType::value_type _ValueType; |
| 127 |
typedef typename _TraitsType::difference_type _DifferenceType; |
| 128 |
|
| 129 |
_ThreadIndex __iam = omp_get_thread_num(); |
| 130 |
_DRSSorterPU<_RAIter, _RandomNumberGenerator>* __d = &__pus[__iam]; |
| 131 |
_DRandomShufflingGlobalData<_RAIter>* __sd = __d->_M_sd; |
| 132 |
|
| 133 |
// Indexing: _M_dist[bin][processor] |
| 134 |
_DifferenceType __length = (__sd->_M_starts[__iam + 1] |
| 135 |
- __sd->_M_starts[__iam]); |
| 136 |
_BinIndex* __oracles = new _BinIndex[__length]; |
| 137 |
_DifferenceType* __dist = new _DifferenceType[__sd->_M_num_bins + 1]; |
| 138 |
_BinIndex* __bin_proc = new _BinIndex[__sd->_M_num_bins]; |
| 139 |
_ValueType** __temporaries = new _ValueType*[__d->_M_num_threads]; |
| 140 |
|
| 141 |
// Compute oracles and count appearances. |
| 142 |
for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b) |
| 143 |
__dist[__b] = 0; |
| 144 |
int __num_bits = __sd->_M_num_bits; |
| 145 |
|
| 146 |
_RandomNumber __rng(__d->_M_seed); |
| 147 |
|
| 148 |
// First main loop. |
| 149 |
for (_DifferenceType __i = 0; __i < __length; ++__i) |
| 150 |
{ |
| 151 |
_BinIndex __oracle = __random_number_pow2(__num_bits, __rng); |
| 152 |
__oracles[__i] = __oracle; |
| 153 |
|
| 154 |
// To allow prefix (partial) sum. |
| 155 |
++(__dist[__oracle + 1]); |
| 156 |
} |
| 157 |
|
| 158 |
for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b) |
| 159 |
__sd->_M_dist[__b][__iam + 1] = __dist[__b]; |
| 160 |
|
| 161 |
# pragma omp barrier |
| 162 |
|
| 163 |
# pragma omp single |
| 164 |
{ |
| 165 |
// Sum up bins, __sd->_M_dist[__s + 1][__d->_M_num_threads] now |
| 166 |
// contains the total number of items in bin __s |
| 167 |
for (_BinIndex __s = 0; __s < __sd->_M_num_bins; ++__s) |
| 168 |
__gnu_sequential::partial_sum(__sd->_M_dist[__s + 1], |
| 169 |
__sd->_M_dist[__s + 1] |
| 170 |
+ __d->_M_num_threads + 1, |
| 171 |
__sd->_M_dist[__s + 1]); |
| 172 |
} |
| 173 |
|
| 174 |
# pragma omp barrier |
| 175 |
|
| 176 |
_SequenceIndex __offset = 0, __global_offset = 0; |
| 177 |
for (_BinIndex __s = 0; __s < __d->_M_bins_begin; ++__s) |
| 178 |
__global_offset += __sd->_M_dist[__s + 1][__d->_M_num_threads]; |
| 179 |
|
| 180 |
# pragma omp barrier |
| 181 |
|
| 182 |
for (_BinIndex __s = __d->_M_bins_begin; __s < __d->__bins_end; ++__s) |
| 183 |
{ |
| 184 |
for (int __t = 0; __t < __d->_M_num_threads + 1; ++__t) |
| 185 |
__sd->_M_dist[__s + 1][__t] += __offset; |
| 186 |
__offset = __sd->_M_dist[__s + 1][__d->_M_num_threads]; |
| 187 |
} |
| 188 |
|
| 189 |
__sd->_M_temporaries[__iam] = static_cast<_ValueType*> |
| 190 |
(::operator new(sizeof(_ValueType) * __offset)); |
| 191 |
|
| 192 |
# pragma omp barrier |
| 193 |
|
| 194 |
// Draw local copies to avoid false sharing. |
| 195 |
for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b) |
| 196 |
__dist[__b] = __sd->_M_dist[__b][__iam]; |
| 197 |
for (_BinIndex __b = 0; __b < __sd->_M_num_bins; ++__b) |
| 198 |
__bin_proc[__b] = __sd->_M_bin_proc[__b]; |
| 199 |
for (_ThreadIndex __t = 0; __t < __d->_M_num_threads; ++__t) |
| 200 |
__temporaries[__t] = __sd->_M_temporaries[__t]; |
| 201 |
|
| 202 |
_RAIter __source = __sd->_M_source; |
| 203 |
_DifferenceType __start = __sd->_M_starts[__iam]; |
| 204 |
|
| 205 |
// Distribute according to oracles, second main loop. |
| 206 |
for (_DifferenceType __i = 0; __i < __length; ++__i) |
| 207 |
{ |
| 208 |
_BinIndex __target_bin = __oracles[__i]; |
| 209 |
_ThreadIndex __target_p = __bin_proc[__target_bin]; |
| 210 |
|
| 211 |
// Last column [__d->_M_num_threads] stays unchanged. |
| 212 |
::new(&(__temporaries[__target_p][__dist[__target_bin + 1]++])) |
| 213 |
_ValueType(*(__source + __i + __start)); |
| 214 |
} |
| 215 |
|
| 216 |
delete[] __oracles; |
| 217 |
delete[] __dist; |
| 218 |
delete[] __bin_proc; |
| 219 |
delete[] __temporaries; |
| 220 |
|
| 221 |
# pragma omp barrier |
| 222 |
|
| 223 |
// Shuffle bins internally. |
| 224 |
for (_BinIndex __b = __d->_M_bins_begin; __b < __d->__bins_end; ++__b) |
| 225 |
{ |
| 226 |
_ValueType* __begin = |
| 227 |
(__sd->_M_temporaries[__iam] |
| 228 |
+ (__b == __d->_M_bins_begin |
| 229 |
? 0 : __sd->_M_dist[__b][__d->_M_num_threads])), |
| 230 |
*__end = (__sd->_M_temporaries[__iam] |
| 231 |
+ __sd->_M_dist[__b + 1][__d->_M_num_threads]); |
| 232 |
|
| 233 |
__sequential_random_shuffle(__begin, __end, __rng); |
| 234 |
std::copy(__begin, __end, __sd->_M_source + __global_offset |
| 235 |
+ (__b == __d->_M_bins_begin |
| 236 |
? 0 : __sd->_M_dist[__b][__d->_M_num_threads])); |
| 237 |
} |
| 238 |
|
| 239 |
for (_SequenceIndex __i = 0; __i < __offset; ++__i) |
| 240 |
__sd->_M_temporaries[__iam][__i].~_ValueType(); |
| 241 |
::operator delete(__sd->_M_temporaries[__iam]); |
| 242 |
} |
| 243 |
|
| 244 |
/** @brief Round up to the next greater power of 2. |
| 245 |
* @param __x _Integer to round up */ |
| 246 |
template<typename _Tp> |
| 247 |
_Tp |
| 248 |
__round_up_to_pow2(_Tp __x) |
| 249 |
{ |
| 250 |
if (__x <= 1) |
| 251 |
return 1; |
| 252 |
else |
| 253 |
return (_Tp)1 << (__rd_log2(__x - 1) + 1); |
| 254 |
} |
| 255 |
|
| 256 |
/** @brief Main parallel random shuffle step. |
| 257 |
* @param __begin Begin iterator of sequence. |
| 258 |
* @param __end End iterator of sequence. |
| 259 |
* @param __n Length of sequence. |
| 260 |
* @param __num_threads Number of threads to use. |
| 261 |
* @param __rng Random number generator to use. |
| 262 |
*/ |
| 263 |
template<typename _RAIter, typename _RandomNumberGenerator> |
| 264 |
void |
| 265 |
__parallel_random_shuffle_drs(_RAIter __begin, _RAIter __end, |
| 266 |
typename std::iterator_traits |
| 267 |
<_RAIter>::difference_type __n, |
| 268 |
_ThreadIndex __num_threads, |
| 269 |
_RandomNumberGenerator& __rng) |
| 270 |
{ |
| 271 |
typedef std::iterator_traits<_RAIter> _TraitsType; |
| 272 |
typedef typename _TraitsType::value_type _ValueType; |
| 273 |
typedef typename _TraitsType::difference_type _DifferenceType; |
| 274 |
|
| 275 |
_GLIBCXX_CALL(__n) |
| 276 |
|
| 277 |
const _Settings& __s = _Settings::get(); |
| 278 |
|
| 279 |
if (__num_threads > __n) |
| 280 |
__num_threads = static_cast<_ThreadIndex>(__n); |
| 281 |
|
| 282 |
_BinIndex __num_bins, __num_bins_cache; |
| 283 |
|
| 284 |
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 |
| 285 |
// Try the L1 cache first. |
| 286 |
|
| 287 |
// Must fit into L1. |
| 288 |
__num_bins_cache = |
| 289 |
std::max<_DifferenceType>(1, __n / (__s.L1_cache_size_lb |
| 290 |
/ sizeof(_ValueType))); |
| 291 |
__num_bins_cache = __round_up_to_pow2(__num_bins_cache); |
| 292 |
|
| 293 |
// No more buckets than TLB entries, power of 2 |
| 294 |
// Power of 2 and at least one element per bin, at most the TLB size. |
| 295 |
__num_bins = std::min<_DifferenceType>(__n, __num_bins_cache); |
| 296 |
|
| 297 |
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB |
| 298 |
// 2 TLB entries needed per bin. |
| 299 |
__num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, __num_bins); |
| 300 |
#endif |
| 301 |
__num_bins = __round_up_to_pow2(__num_bins); |
| 302 |
|
| 303 |
if (__num_bins < __num_bins_cache) |
| 304 |
{ |
| 305 |
#endif |
| 306 |
// Now try the L2 cache |
| 307 |
// Must fit into L2 |
| 308 |
__num_bins_cache = static_cast<_BinIndex> |
| 309 |
(std::max<_DifferenceType>(1, __n / (__s.L2_cache_size |
| 310 |
/ sizeof(_ValueType)))); |
| 311 |
__num_bins_cache = __round_up_to_pow2(__num_bins_cache); |
| 312 |
|
| 313 |
// No more buckets than TLB entries, power of 2. |
| 314 |
__num_bins = static_cast<_BinIndex> |
| 315 |
(std::min(__n, static_cast<_DifferenceType>(__num_bins_cache))); |
| 316 |
// Power of 2 and at least one element per bin, at most the TLB size. |
| 317 |
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB |
| 318 |
// 2 TLB entries needed per bin. |
| 319 |
__num_bins = std::min(static_cast<_DifferenceType>(__s.TLB_size / 2), |
| 320 |
__num_bins); |
| 321 |
#endif |
| 322 |
__num_bins = __round_up_to_pow2(__num_bins); |
| 323 |
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 |
| 324 |
} |
| 325 |
#endif |
| 326 |
|
| 327 |
__num_bins = __round_up_to_pow2( |
| 328 |
std::max<_BinIndex>(__num_threads, __num_bins)); |
| 329 |
|
| 330 |
if (__num_threads <= 1) |
| 331 |
{ |
| 332 |
_RandomNumber __derived_rng( |
| 333 |
__rng(std::numeric_limits<uint32_t>::max())); |
| 334 |
__sequential_random_shuffle(__begin, __end, __derived_rng); |
| 335 |
return; |
| 336 |
} |
| 337 |
|
| 338 |
_DRandomShufflingGlobalData<_RAIter> __sd(__begin); |
| 339 |
_DRSSorterPU<_RAIter, _RandomNumber >* __pus; |
| 340 |
_DifferenceType* __starts; |
| 341 |
|
| 342 |
# pragma omp parallel num_threads(__num_threads) |
| 343 |
{ |
| 344 |
_ThreadIndex __num_threads = omp_get_num_threads(); |
| 345 |
# pragma omp single |
| 346 |
{ |
| 347 |
__pus = new _DRSSorterPU<_RAIter, _RandomNumber>[__num_threads]; |
| 348 |
|
| 349 |
__sd._M_temporaries = new _ValueType*[__num_threads]; |
| 350 |
__sd._M_dist = new _DifferenceType*[__num_bins + 1]; |
| 351 |
__sd._M_bin_proc = new _ThreadIndex[__num_bins]; |
| 352 |
for (_BinIndex __b = 0; __b < __num_bins + 1; ++__b) |
| 353 |
__sd._M_dist[__b] = new _DifferenceType[__num_threads + 1]; |
| 354 |
for (_BinIndex __b = 0; __b < (__num_bins + 1); ++__b) |
| 355 |
{ |
| 356 |
__sd._M_dist[0][0] = 0; |
| 357 |
__sd._M_dist[__b][0] = 0; |
| 358 |
} |
| 359 |
__starts = __sd._M_starts = new _DifferenceType[__num_threads + 1]; |
| 360 |
int __bin_cursor = 0; |
| 361 |
__sd._M_num_bins = __num_bins; |
| 362 |
__sd._M_num_bits = __rd_log2(__num_bins); |
| 363 |
|
| 364 |
_DifferenceType __chunk_length = __n / __num_threads, |
| 365 |
__split = __n % __num_threads, |
| 366 |
__start = 0; |
| 367 |
_DifferenceType __bin_chunk_length = __num_bins / __num_threads, |
| 368 |
__bin_split = __num_bins % __num_threads; |
| 369 |
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) |
| 370 |
{ |
| 371 |
__starts[__i] = __start; |
| 372 |
__start += (__i < __split |
| 373 |
? (__chunk_length + 1) : __chunk_length); |
| 374 |
int __j = __pus[__i]._M_bins_begin = __bin_cursor; |
| 375 |
|
| 376 |
// Range of bins for this processor. |
| 377 |
__bin_cursor += (__i < __bin_split |
| 378 |
? (__bin_chunk_length + 1) |
| 379 |
: __bin_chunk_length); |
| 380 |
__pus[__i].__bins_end = __bin_cursor; |
| 381 |
for (; __j < __bin_cursor; ++__j) |
| 382 |
__sd._M_bin_proc[__j] = __i; |
| 383 |
__pus[__i]._M_num_threads = __num_threads; |
| 384 |
__pus[__i]._M_seed = __rng(std::numeric_limits<uint32_t>::max()); |
| 385 |
__pus[__i]._M_sd = &__sd; |
| 386 |
} |
| 387 |
__starts[__num_threads] = __start; |
| 388 |
} //single |
| 389 |
// Now shuffle in parallel. |
| 390 |
__parallel_random_shuffle_drs_pu(__pus); |
| 391 |
} // parallel |
| 392 |
|
| 393 |
delete[] __starts; |
| 394 |
delete[] __sd._M_bin_proc; |
| 395 |
for (int __s = 0; __s < (__num_bins + 1); ++__s) |
| 396 |
delete[] __sd._M_dist[__s]; |
| 397 |
delete[] __sd._M_dist; |
| 398 |
delete[] __sd._M_temporaries; |
| 399 |
|
| 400 |
delete[] __pus; |
| 401 |
} |
| 402 |
|
| 403 |
/** @brief Sequential cache-efficient random shuffle. |
| 404 |
* @param __begin Begin iterator of sequence. |
| 405 |
* @param __end End iterator of sequence. |
| 406 |
* @param __rng Random number generator to use. |
| 407 |
*/ |
| 408 |
template<typename _RAIter, typename _RandomNumberGenerator> |
| 409 |
void |
| 410 |
__sequential_random_shuffle(_RAIter __begin, _RAIter __end, |
| 411 |
_RandomNumberGenerator& __rng) |
| 412 |
{ |
| 413 |
typedef std::iterator_traits<_RAIter> _TraitsType; |
| 414 |
typedef typename _TraitsType::value_type _ValueType; |
| 415 |
typedef typename _TraitsType::difference_type _DifferenceType; |
| 416 |
|
| 417 |
_DifferenceType __n = __end - __begin; |
| 418 |
const _Settings& __s = _Settings::get(); |
| 419 |
|
| 420 |
_BinIndex __num_bins, __num_bins_cache; |
| 421 |
|
| 422 |
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 |
| 423 |
// Try the L1 cache first, must fit into L1. |
| 424 |
__num_bins_cache = std::max<_DifferenceType> |
| 425 |
(1, __n / (__s.L1_cache_size_lb / sizeof(_ValueType))); |
| 426 |
__num_bins_cache = __round_up_to_pow2(__num_bins_cache); |
| 427 |
|
| 428 |
// No more buckets than TLB entries, power of 2 |
| 429 |
// Power of 2 and at least one element per bin, at most the TLB size |
| 430 |
__num_bins = std::min(__n, (_DifferenceType)__num_bins_cache); |
| 431 |
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB |
| 432 |
// 2 TLB entries needed per bin |
| 433 |
__num_bins = std::min((_DifferenceType)__s.TLB_size / 2, __num_bins); |
| 434 |
#endif |
| 435 |
__num_bins = __round_up_to_pow2(__num_bins); |
| 436 |
|
| 437 |
if (__num_bins < __num_bins_cache) |
| 438 |
{ |
| 439 |
#endif |
| 440 |
// Now try the L2 cache, must fit into L2. |
| 441 |
__num_bins_cache = static_cast<_BinIndex> |
| 442 |
(std::max<_DifferenceType>(1, __n / (__s.L2_cache_size |
| 443 |
/ sizeof(_ValueType)))); |
| 444 |
__num_bins_cache = __round_up_to_pow2(__num_bins_cache); |
| 445 |
|
| 446 |
// No more buckets than TLB entries, power of 2 |
| 447 |
// Power of 2 and at least one element per bin, at most the TLB size. |
| 448 |
__num_bins = static_cast<_BinIndex> |
| 449 |
(std::min(__n, static_cast<_DifferenceType>(__num_bins_cache))); |
| 450 |
|
| 451 |
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB |
| 452 |
// 2 TLB entries needed per bin |
| 453 |
__num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, __num_bins); |
| 454 |
#endif |
| 455 |
__num_bins = __round_up_to_pow2(__num_bins); |
| 456 |
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 |
| 457 |
} |
| 458 |
#endif |
| 459 |
|
| 460 |
int __num_bits = __rd_log2(__num_bins); |
| 461 |
|
| 462 |
if (__num_bins > 1) |
| 463 |
{ |
| 464 |
_ValueType* __target = |
| 465 |
static_cast<_ValueType*>(::operator new(sizeof(_ValueType) * __n)); |
| 466 |
_BinIndex* __oracles = new _BinIndex[__n]; |
| 467 |
_DifferenceType* __dist0 = new _DifferenceType[__num_bins + 1], |
| 468 |
* __dist1 = new _DifferenceType[__num_bins + 1]; |
| 469 |
|
| 470 |
for (int __b = 0; __b < __num_bins + 1; ++__b) |
| 471 |
__dist0[__b] = 0; |
| 472 |
|
| 473 |
_RandomNumber __bitrng(__rng(0xFFFFFFFF)); |
| 474 |
|
| 475 |
for (_DifferenceType __i = 0; __i < __n; ++__i) |
| 476 |
{ |
| 477 |
_BinIndex __oracle = __random_number_pow2(__num_bits, __bitrng); |
| 478 |
__oracles[__i] = __oracle; |
| 479 |
|
| 480 |
// To allow prefix (partial) sum. |
| 481 |
++(__dist0[__oracle + 1]); |
| 482 |
} |
| 483 |
|
| 484 |
// Sum up bins. |
| 485 |
__gnu_sequential::partial_sum(__dist0, __dist0 + __num_bins + 1, |
| 486 |
__dist0); |
| 487 |
|
| 488 |
for (int __b = 0; __b < __num_bins + 1; ++__b) |
| 489 |
__dist1[__b] = __dist0[__b]; |
| 490 |
|
| 491 |
// Distribute according to oracles. |
| 492 |
for (_DifferenceType __i = 0; __i < __n; ++__i) |
| 493 |
::new(&(__target[(__dist0[__oracles[__i]])++])) |
| 494 |
_ValueType(*(__begin + __i)); |
| 495 |
|
| 496 |
for (int __b = 0; __b < __num_bins; ++__b) |
| 497 |
__sequential_random_shuffle(__target + __dist1[__b], |
| 498 |
__target + __dist1[__b + 1], __rng); |
| 499 |
|
| 500 |
// Copy elements back. |
| 501 |
std::copy(__target, __target + __n, __begin); |
| 502 |
|
| 503 |
delete[] __dist0; |
| 504 |
delete[] __dist1; |
| 505 |
delete[] __oracles; |
| 506 |
|
| 507 |
for (_DifferenceType __i = 0; __i < __n; ++__i) |
| 508 |
__target[__i].~_ValueType(); |
| 509 |
::operator delete(__target); |
| 510 |
} |
| 511 |
else |
| 512 |
__gnu_sequential::random_shuffle(__begin, __end, __rng); |
| 513 |
} |
| 514 |
|
| 515 |
/** @brief Parallel random public call. |
| 516 |
* @param __begin Begin iterator of sequence. |
| 517 |
* @param __end End iterator of sequence. |
| 518 |
* @param __rng Random number generator to use. |
| 519 |
*/ |
| 520 |
template<typename _RAIter, typename _RandomNumberGenerator> |
| 521 |
inline void |
| 522 |
__parallel_random_shuffle(_RAIter __begin, _RAIter __end, |
| 523 |
_RandomNumberGenerator __rng = _RandomNumber()) |
| 524 |
{ |
| 525 |
typedef std::iterator_traits<_RAIter> _TraitsType; |
| 526 |
typedef typename _TraitsType::difference_type _DifferenceType; |
| 527 |
_DifferenceType __n = __end - __begin; |
| 528 |
__parallel_random_shuffle_drs(__begin, __end, __n, |
| 529 |
__get_max_threads(), __rng); |
| 530 |
} |
| 531 |
} |
| 532 |
|
| 533 |
#endif /* _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H */ |