| 1 |
// -*- C++ -*- |
| 2 |
|
| 3 |
// Copyright (C) 2007-2021 Free Software Foundation, Inc. |
| 4 |
// |
| 5 |
// This file is part of the GNU ISO C++ Library. This library is free |
| 6 |
// software; you can redistribute it and/or modify it under the terms |
| 7 |
// of the GNU General Public License as published by the Free Software |
| 8 |
// Foundation; either version 3, or (at your option) any later |
| 9 |
// version. |
| 10 |
|
| 11 |
// This library is distributed in the hope that it will be useful, but |
| 12 |
// WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 |
// General Public License for more details. |
| 15 |
|
| 16 |
// Under Section 7 of GPL version 3, you are granted additional |
| 17 |
// permissions described in the GCC Runtime Library Exception, version |
| 18 |
// 3.1, as published by the Free Software Foundation. |
| 19 |
|
| 20 |
// You should have received a copy of the GNU General Public License and |
| 21 |
// a copy of the GCC Runtime Library Exception along with this program; |
| 22 |
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| 23 |
// <http://www.gnu.org/licenses/>. |
| 24 |
|
| 25 |
/** @file parallel/partition.h |
| 26 |
* @brief Parallel implementation of std::partition(), |
| 27 |
* std::nth_element(), and std::partial_sort(). |
| 28 |
* This file is a GNU parallel extension to the Standard C++ Library. |
| 29 |
*/ |
| 30 |
|
| 31 |
// Written by Johannes Singler and Felix Putze. |
| 32 |
|
| 33 |
#ifndef _GLIBCXX_PARALLEL_PARTITION_H |
| 34 |
#define _GLIBCXX_PARALLEL_PARTITION_H 1 |
| 35 |
|
| 36 |
#include <parallel/basic_iterator.h> |
| 37 |
#include <parallel/sort.h> |
| 38 |
#include <parallel/random_number.h> |
| 39 |
#include <bits/stl_algo.h> |
| 40 |
#include <parallel/parallel.h> |
| 41 |
|
| 42 |
/** @brief Decide whether to declare certain variables volatile. */ |
| 43 |
#define _GLIBCXX_VOLATILE volatile |
| 44 |
|
| 45 |
namespace __gnu_parallel |
| 46 |
{ |
| 47 |
/** @brief Parallel implementation of std::partition. |
| 48 |
* @param __begin Begin iterator of input sequence to split. |
| 49 |
* @param __end End iterator of input sequence to split. |
| 50 |
* @param __pred Partition predicate, possibly including some kind |
| 51 |
* of pivot. |
| 52 |
* @param __num_threads Maximum number of threads to use for this task. |
| 53 |
* @return Number of elements not fulfilling the predicate. */ |
| 54 |
template<typename _RAIter, typename _Predicate> |
| 55 |
typename std::iterator_traits<_RAIter>::difference_type |
| 56 |
__parallel_partition(_RAIter __begin, _RAIter __end, |
| 57 |
_Predicate __pred, _ThreadIndex __num_threads) |
| 58 |
{ |
| 59 |
typedef std::iterator_traits<_RAIter> _TraitsType; |
| 60 |
typedef typename _TraitsType::value_type _ValueType; |
| 61 |
typedef typename _TraitsType::difference_type _DifferenceType; |
| 62 |
|
| 63 |
_DifferenceType __n = __end - __begin; |
| 64 |
|
| 65 |
_GLIBCXX_CALL(__n) |
| 66 |
|
| 67 |
const _Settings& __s = _Settings::get(); |
| 68 |
|
| 69 |
// shared |
| 70 |
_GLIBCXX_VOLATILE _DifferenceType __left = 0, __right = __n - 1, |
| 71 |
__dist = __n, |
| 72 |
__leftover_left, __leftover_right, |
| 73 |
__leftnew, __rightnew; |
| 74 |
|
| 75 |
// just 0 or 1, but int to allow atomic operations |
| 76 |
int* __reserved_left = 0, * __reserved_right = 0; |
| 77 |
|
| 78 |
_DifferenceType __chunk_size = __s.partition_chunk_size; |
| 79 |
|
| 80 |
//at least two chunks per thread |
| 81 |
if (__dist >= 2 * __num_threads * __chunk_size) |
| 82 |
# pragma omp parallel num_threads(__num_threads) |
| 83 |
{ |
| 84 |
# pragma omp single |
| 85 |
{ |
| 86 |
__num_threads = omp_get_num_threads(); |
| 87 |
__reserved_left = new int[__num_threads]; |
| 88 |
__reserved_right = new int[__num_threads]; |
| 89 |
|
| 90 |
if (__s.partition_chunk_share > 0.0) |
| 91 |
__chunk_size = std::max<_DifferenceType> |
| 92 |
(__s.partition_chunk_size, (double)__n |
| 93 |
* __s.partition_chunk_share / (double)__num_threads); |
| 94 |
else |
| 95 |
__chunk_size = __s.partition_chunk_size; |
| 96 |
} |
| 97 |
|
| 98 |
while (__dist >= 2 * __num_threads * __chunk_size) |
| 99 |
{ |
| 100 |
# pragma omp single |
| 101 |
{ |
| 102 |
_DifferenceType __num_chunks = __dist / __chunk_size; |
| 103 |
|
| 104 |
for (_ThreadIndex __r = 0; __r < __num_threads; ++__r) |
| 105 |
{ |
| 106 |
__reserved_left [__r] = 0; // false |
| 107 |
__reserved_right[__r] = 0; // false |
| 108 |
} |
| 109 |
__leftover_left = 0; |
| 110 |
__leftover_right = 0; |
| 111 |
} //implicit barrier |
| 112 |
|
| 113 |
// Private. |
| 114 |
_DifferenceType __thread_left, __thread_left_border, |
| 115 |
__thread_right, __thread_right_border; |
| 116 |
|
| 117 |
__thread_left = __left + 1; |
| 118 |
// Just to satisfy the condition below. |
| 119 |
__thread_left_border = __thread_left - 1; |
| 120 |
|
| 121 |
__thread_right = __n - 1; |
| 122 |
// Just to satisfy the condition below. |
| 123 |
__thread_right_border = __thread_right + 1; |
| 124 |
|
| 125 |
bool __iam_finished = false; |
| 126 |
while (!__iam_finished) |
| 127 |
{ |
| 128 |
if (__thread_left > __thread_left_border) |
| 129 |
{ |
| 130 |
_DifferenceType __former_dist = |
| 131 |
__fetch_and_add(&__dist, -__chunk_size); |
| 132 |
if (__former_dist < __chunk_size) |
| 133 |
{ |
| 134 |
__fetch_and_add(&__dist, __chunk_size); |
| 135 |
__iam_finished = true; |
| 136 |
break; |
| 137 |
} |
| 138 |
else |
| 139 |
{ |
| 140 |
__thread_left = |
| 141 |
__fetch_and_add(&__left, __chunk_size); |
| 142 |
__thread_left_border = |
| 143 |
__thread_left + (__chunk_size - 1); |
| 144 |
} |
| 145 |
} |
| 146 |
|
| 147 |
if (__thread_right < __thread_right_border) |
| 148 |
{ |
| 149 |
_DifferenceType __former_dist = |
| 150 |
__fetch_and_add(&__dist, -__chunk_size); |
| 151 |
if (__former_dist < __chunk_size) |
| 152 |
{ |
| 153 |
__fetch_and_add(&__dist, __chunk_size); |
| 154 |
__iam_finished = true; |
| 155 |
break; |
| 156 |
} |
| 157 |
else |
| 158 |
{ |
| 159 |
__thread_right = |
| 160 |
__fetch_and_add(&__right, -__chunk_size); |
| 161 |
__thread_right_border = |
| 162 |
__thread_right - (__chunk_size - 1); |
| 163 |
} |
| 164 |
} |
| 165 |
|
| 166 |
// Swap as usual. |
| 167 |
while (__thread_left < __thread_right) |
| 168 |
{ |
| 169 |
while (__pred(__begin[__thread_left]) |
| 170 |
&& __thread_left <= __thread_left_border) |
| 171 |
++__thread_left; |
| 172 |
while (!__pred(__begin[__thread_right]) |
| 173 |
&& __thread_right >= __thread_right_border) |
| 174 |
--__thread_right; |
| 175 |
|
| 176 |
if (__thread_left > __thread_left_border |
| 177 |
|| __thread_right < __thread_right_border) |
| 178 |
// Fetch new chunk(__s). |
| 179 |
break; |
| 180 |
|
| 181 |
std::iter_swap(__begin + __thread_left, |
| 182 |
__begin + __thread_right); |
| 183 |
++__thread_left; |
| 184 |
--__thread_right; |
| 185 |
} |
| 186 |
} |
| 187 |
|
| 188 |
// Now swap the leftover chunks to the right places. |
| 189 |
if (__thread_left <= __thread_left_border) |
| 190 |
# pragma omp atomic |
| 191 |
++__leftover_left; |
| 192 |
if (__thread_right >= __thread_right_border) |
| 193 |
# pragma omp atomic |
| 194 |
++__leftover_right; |
| 195 |
|
| 196 |
# pragma omp barrier |
| 197 |
|
| 198 |
_DifferenceType |
| 199 |
__leftold = __left, |
| 200 |
__leftnew = __left - __leftover_left * __chunk_size, |
| 201 |
__rightold = __right, |
| 202 |
__rightnew = __right + __leftover_right * __chunk_size; |
| 203 |
|
| 204 |
// <=> __thread_left_border + (__chunk_size - 1) >= __leftnew |
| 205 |
if (__thread_left <= __thread_left_border |
| 206 |
&& __thread_left_border >= __leftnew) |
| 207 |
{ |
| 208 |
// Chunk already in place, reserve spot. |
| 209 |
__reserved_left[(__left - (__thread_left_border + 1)) |
| 210 |
/ __chunk_size] = 1; |
| 211 |
} |
| 212 |
|
| 213 |
// <=> __thread_right_border - (__chunk_size - 1) <= __rightnew |
| 214 |
if (__thread_right >= __thread_right_border |
| 215 |
&& __thread_right_border <= __rightnew) |
| 216 |
{ |
| 217 |
// Chunk already in place, reserve spot. |
| 218 |
__reserved_right[((__thread_right_border - 1) - __right) |
| 219 |
/ __chunk_size] = 1; |
| 220 |
} |
| 221 |
|
| 222 |
# pragma omp barrier |
| 223 |
|
| 224 |
if (__thread_left <= __thread_left_border |
| 225 |
&& __thread_left_border < __leftnew) |
| 226 |
{ |
| 227 |
// Find spot and swap. |
| 228 |
_DifferenceType __swapstart = -1; |
| 229 |
for (int __r = 0; __r < __leftover_left; ++__r) |
| 230 |
if (__reserved_left[__r] == 0 |
| 231 |
&& __compare_and_swap(&(__reserved_left[__r]), 0, 1)) |
| 232 |
{ |
| 233 |
__swapstart = __leftold - (__r + 1) * __chunk_size; |
| 234 |
break; |
| 235 |
} |
| 236 |
|
| 237 |
#if _GLIBCXX_PARALLEL_ASSERTIONS |
| 238 |
_GLIBCXX_PARALLEL_ASSERT(__swapstart != -1); |
| 239 |
#endif |
| 240 |
|
| 241 |
std::swap_ranges(__begin + __thread_left_border |
| 242 |
- (__chunk_size - 1), |
| 243 |
__begin + __thread_left_border + 1, |
| 244 |
__begin + __swapstart); |
| 245 |
} |
| 246 |
|
| 247 |
if (__thread_right >= __thread_right_border |
| 248 |
&& __thread_right_border > __rightnew) |
| 249 |
{ |
| 250 |
// Find spot and swap |
| 251 |
_DifferenceType __swapstart = -1; |
| 252 |
for (int __r = 0; __r < __leftover_right; ++__r) |
| 253 |
if (__reserved_right[__r] == 0 |
| 254 |
&& __compare_and_swap(&(__reserved_right[__r]), 0, 1)) |
| 255 |
{ |
| 256 |
__swapstart = __rightold + __r * __chunk_size + 1; |
| 257 |
break; |
| 258 |
} |
| 259 |
|
| 260 |
#if _GLIBCXX_PARALLEL_ASSERTIONS |
| 261 |
_GLIBCXX_PARALLEL_ASSERT(__swapstart != -1); |
| 262 |
#endif |
| 263 |
|
| 264 |
std::swap_ranges(__begin + __thread_right_border, |
| 265 |
__begin + __thread_right_border |
| 266 |
+ __chunk_size, __begin + __swapstart); |
| 267 |
} |
| 268 |
#if _GLIBCXX_PARALLEL_ASSERTIONS |
| 269 |
# pragma omp barrier |
| 270 |
|
| 271 |
# pragma omp single |
| 272 |
{ |
| 273 |
for (_DifferenceType __r = 0; __r < __leftover_left; ++__r) |
| 274 |
_GLIBCXX_PARALLEL_ASSERT(__reserved_left[__r] == 1); |
| 275 |
for (_DifferenceType __r = 0; __r < __leftover_right; ++__r) |
| 276 |
_GLIBCXX_PARALLEL_ASSERT(__reserved_right[__r] == 1); |
| 277 |
} |
| 278 |
#endif |
| 279 |
|
| 280 |
__left = __leftnew; |
| 281 |
__right = __rightnew; |
| 282 |
__dist = __right - __left + 1; |
| 283 |
} |
| 284 |
|
| 285 |
# pragma omp flush(__left, __right) |
| 286 |
} // end "recursion" //parallel |
| 287 |
|
| 288 |
_DifferenceType __final_left = __left, __final_right = __right; |
| 289 |
|
| 290 |
while (__final_left < __final_right) |
| 291 |
{ |
| 292 |
// Go right until key is geq than pivot. |
| 293 |
while (__pred(__begin[__final_left]) |
| 294 |
&& __final_left < __final_right) |
| 295 |
++__final_left; |
| 296 |
|
| 297 |
// Go left until key is less than pivot. |
| 298 |
while (!__pred(__begin[__final_right]) |
| 299 |
&& __final_left < __final_right) |
| 300 |
--__final_right; |
| 301 |
|
| 302 |
if (__final_left == __final_right) |
| 303 |
break; |
| 304 |
std::iter_swap(__begin + __final_left, __begin + __final_right); |
| 305 |
++__final_left; |
| 306 |
--__final_right; |
| 307 |
} |
| 308 |
|
| 309 |
// All elements on the left side are < piv, all elements on the |
| 310 |
// right are >= piv |
| 311 |
delete[] __reserved_left; |
| 312 |
delete[] __reserved_right; |
| 313 |
|
| 314 |
// Element "between" __final_left and __final_right might not have |
| 315 |
// been regarded yet |
| 316 |
if (__final_left < __n && !__pred(__begin[__final_left])) |
| 317 |
// Really swapped. |
| 318 |
return __final_left; |
| 319 |
else |
| 320 |
return __final_left + 1; |
| 321 |
} |
| 322 |
|
| 323 |
/** |
| 324 |
* @brief Parallel implementation of std::nth_element(). |
| 325 |
* @param __begin Begin iterator of input sequence. |
| 326 |
* @param __nth _Iterator of element that must be in position afterwards. |
| 327 |
* @param __end End iterator of input sequence. |
| 328 |
* @param __comp Comparator. |
| 329 |
*/ |
| 330 |
template<typename _RAIter, typename _Compare> |
| 331 |
void |
| 332 |
__parallel_nth_element(_RAIter __begin, _RAIter __nth, |
| 333 |
_RAIter __end, _Compare __comp) |
| 334 |
{ |
| 335 |
typedef std::iterator_traits<_RAIter> _TraitsType; |
| 336 |
typedef typename _TraitsType::value_type _ValueType; |
| 337 |
typedef typename _TraitsType::difference_type _DifferenceType; |
| 338 |
|
| 339 |
_GLIBCXX_CALL(__end - __begin) |
| 340 |
|
| 341 |
_RAIter __split; |
| 342 |
_RandomNumber __rng; |
| 343 |
|
| 344 |
const _Settings& __s = _Settings::get(); |
| 345 |
_DifferenceType __minimum_length = std::max<_DifferenceType>(2, |
| 346 |
std::max(__s.nth_element_minimal_n, __s.partition_minimal_n)); |
| 347 |
|
| 348 |
// Break if input range to small. |
| 349 |
while (static_cast<_SequenceIndex>(__end - __begin) >= __minimum_length) |
| 350 |
{ |
| 351 |
_DifferenceType __n = __end - __begin; |
| 352 |
|
| 353 |
_RAIter __pivot_pos = __begin + __rng(__n); |
| 354 |
|
| 355 |
// Swap __pivot_pos value to end. |
| 356 |
if (__pivot_pos != (__end - 1)) |
| 357 |
std::iter_swap(__pivot_pos, __end - 1); |
| 358 |
__pivot_pos = __end - 1; |
| 359 |
|
| 360 |
// _Compare must have first_value_type, second_value_type, |
| 361 |
// result_type |
| 362 |
// _Compare == |
| 363 |
// __gnu_parallel::_Lexicographic<S, int, |
| 364 |
// __gnu_parallel::_Less<S, S> > |
| 365 |
// __pivot_pos == std::pair<S, int>* |
| 366 |
__gnu_parallel::__binder2nd<_Compare, _ValueType, _ValueType, bool> |
| 367 |
__pred(__comp, *__pivot_pos); |
| 368 |
|
| 369 |
// Divide, leave pivot unchanged in last place. |
| 370 |
_RAIter __split_pos1, __split_pos2; |
| 371 |
__split_pos1 = __begin + __parallel_partition(__begin, __end - 1, |
| 372 |
__pred, |
| 373 |
__get_max_threads()); |
| 374 |
|
| 375 |
// Left side: < __pivot_pos; __right side: >= __pivot_pos |
| 376 |
|
| 377 |
// Swap pivot back to middle. |
| 378 |
if (__split_pos1 != __pivot_pos) |
| 379 |
std::iter_swap(__split_pos1, __pivot_pos); |
| 380 |
__pivot_pos = __split_pos1; |
| 381 |
|
| 382 |
// In case all elements are equal, __split_pos1 == 0 |
| 383 |
if ((__split_pos1 + 1 - __begin) < (__n >> 7) |
| 384 |
|| (__end - __split_pos1) < (__n >> 7)) |
| 385 |
{ |
| 386 |
// Very unequal split, one part smaller than one 128th |
| 387 |
// elements not strictly larger than the pivot. |
| 388 |
__gnu_parallel::__unary_negate<__gnu_parallel:: |
| 389 |
__binder1st<_Compare, _ValueType, |
| 390 |
_ValueType, bool>, _ValueType> |
| 391 |
__pred(__gnu_parallel::__binder1st<_Compare, _ValueType, |
| 392 |
_ValueType, bool>(__comp, *__pivot_pos)); |
| 393 |
|
| 394 |
// Find other end of pivot-equal range. |
| 395 |
__split_pos2 = __gnu_sequential::partition(__split_pos1 + 1, |
| 396 |
__end, __pred); |
| 397 |
} |
| 398 |
else |
| 399 |
// Only skip the pivot. |
| 400 |
__split_pos2 = __split_pos1 + 1; |
| 401 |
|
| 402 |
// Compare iterators. |
| 403 |
if (__split_pos2 <= __nth) |
| 404 |
__begin = __split_pos2; |
| 405 |
else if (__nth < __split_pos1) |
| 406 |
__end = __split_pos1; |
| 407 |
else |
| 408 |
break; |
| 409 |
} |
| 410 |
|
| 411 |
// Only at most _Settings::partition_minimal_n __elements __left. |
| 412 |
__gnu_sequential::nth_element(__begin, __nth, __end, __comp); |
| 413 |
} |
| 414 |
|
| 415 |
/** @brief Parallel implementation of std::partial_sort(). |
| 416 |
* @param __begin Begin iterator of input sequence. |
| 417 |
* @param __middle Sort until this position. |
| 418 |
* @param __end End iterator of input sequence. |
| 419 |
* @param __comp Comparator. */ |
| 420 |
template<typename _RAIter, typename _Compare> |
| 421 |
void |
| 422 |
__parallel_partial_sort(_RAIter __begin, |
| 423 |
_RAIter __middle, |
| 424 |
_RAIter __end, _Compare __comp) |
| 425 |
{ |
| 426 |
__parallel_nth_element(__begin, __middle, __end, __comp); |
| 427 |
std::sort(__begin, __middle, __comp); |
| 428 |
} |
| 429 |
|
| 430 |
} //namespace __gnu_parallel |
| 431 |
|
| 432 |
#undef _GLIBCXX_VOLATILE |
| 433 |
|
| 434 |
#endif /* _GLIBCXX_PARALLEL_PARTITION_H */ |