| 1 |
// -*- C++ -*- |
| 2 |
|
| 3 |
// Copyright (C) 2007-2021 Free Software Foundation, Inc. |
| 4 |
// |
| 5 |
// This file is part of the GNU ISO C++ Library. This library is free |
| 6 |
// software; you can redistribute it and/or modify it under the terms |
| 7 |
// of the GNU General Public License as published by the Free Software |
| 8 |
// Foundation; either version 3, or (at your option) any later |
| 9 |
// version. |
| 10 |
|
| 11 |
// This library is distributed in the hope that it will be useful, but |
| 12 |
// WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 |
// General Public License for more details. |
| 15 |
|
| 16 |
// Under Section 7 of GPL version 3, you are granted additional |
| 17 |
// permissions described in the GCC Runtime Library Exception, version |
| 18 |
// 3.1, as published by the Free Software Foundation. |
| 19 |
|
| 20 |
// You should have received a copy of the GNU General Public License and |
| 21 |
// a copy of the GCC Runtime Library Exception along with this program; |
| 22 |
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| 23 |
// <http://www.gnu.org/licenses/>. |
| 24 |
|
| 25 |
/** @file parallel/partial_sum.h |
| 26 |
* @brief Parallel implementation of std::partial_sum(), i.e. prefix |
| 27 |
* sums. |
| 28 |
* This file is a GNU parallel extension to the Standard C++ Library. |
| 29 |
*/ |
| 30 |
|
| 31 |
// Written by Johannes Singler. |
| 32 |
|
| 33 |
#ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H |
| 34 |
#define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1 |
| 35 |
|
| 36 |
#include <omp.h> |
| 37 |
#include <new> |
| 38 |
#include <bits/stl_algobase.h> |
| 39 |
#include <parallel/parallel.h> |
| 40 |
#include <parallel/numericfwd.h> |
| 41 |
|
| 42 |
namespace __gnu_parallel |
| 43 |
{ |
| 44 |
// Problem: there is no 0-element given. |
| 45 |
|
| 46 |
/** @brief Base case prefix sum routine. |
| 47 |
* @param __begin Begin iterator of input sequence. |
| 48 |
* @param __end End iterator of input sequence. |
| 49 |
* @param __result Begin iterator of output sequence. |
| 50 |
* @param __bin_op Associative binary function. |
| 51 |
* @param __value Start value. Must be passed since the neutral |
| 52 |
* element is unknown in general. |
| 53 |
* @return End iterator of output sequence. */ |
| 54 |
template<typename _IIter, |
| 55 |
typename _OutputIterator, |
| 56 |
typename _BinaryOperation> |
| 57 |
_OutputIterator |
| 58 |
__parallel_partial_sum_basecase(_IIter __begin, _IIter __end, |
| 59 |
_OutputIterator __result, |
| 60 |
_BinaryOperation __bin_op, |
| 61 |
typename std::iterator_traits <_IIter>::value_type __value) |
| 62 |
{ |
| 63 |
if (__begin == __end) |
| 64 |
return __result; |
| 65 |
|
| 66 |
while (__begin != __end) |
| 67 |
{ |
| 68 |
__value = __bin_op(__value, *__begin); |
| 69 |
*__result = __value; |
| 70 |
++__result; |
| 71 |
++__begin; |
| 72 |
} |
| 73 |
return __result; |
| 74 |
} |
| 75 |
|
| 76 |
/** @brief Parallel partial sum implementation, two-phase approach, |
| 77 |
no recursion. |
| 78 |
* @param __begin Begin iterator of input sequence. |
| 79 |
* @param __end End iterator of input sequence. |
| 80 |
* @param __result Begin iterator of output sequence. |
| 81 |
* @param __bin_op Associative binary function. |
| 82 |
* @param __n Length of sequence. |
| 83 |
* @return End iterator of output sequence. |
| 84 |
*/ |
| 85 |
template<typename _IIter, |
| 86 |
typename _OutputIterator, |
| 87 |
typename _BinaryOperation> |
| 88 |
_OutputIterator |
| 89 |
__parallel_partial_sum_linear(_IIter __begin, _IIter __end, |
| 90 |
_OutputIterator __result, |
| 91 |
_BinaryOperation __bin_op, |
| 92 |
typename std::iterator_traits<_IIter>::difference_type __n) |
| 93 |
{ |
| 94 |
typedef std::iterator_traits<_IIter> _TraitsType; |
| 95 |
typedef typename _TraitsType::value_type _ValueType; |
| 96 |
typedef typename _TraitsType::difference_type _DifferenceType; |
| 97 |
|
| 98 |
if (__begin == __end) |
| 99 |
return __result; |
| 100 |
|
| 101 |
_ThreadIndex __num_threads = |
| 102 |
std::min<_DifferenceType>(__get_max_threads(), __n - 1); |
| 103 |
|
| 104 |
if (__num_threads < 2) |
| 105 |
{ |
| 106 |
*__result = *__begin; |
| 107 |
return __parallel_partial_sum_basecase(__begin + 1, __end, |
| 108 |
__result + 1, __bin_op, |
| 109 |
*__begin); |
| 110 |
} |
| 111 |
|
| 112 |
_DifferenceType* __borders; |
| 113 |
_ValueType* __sums; |
| 114 |
|
| 115 |
const _Settings& __s = _Settings::get(); |
| 116 |
|
| 117 |
# pragma omp parallel num_threads(__num_threads) |
| 118 |
{ |
| 119 |
# pragma omp single |
| 120 |
{ |
| 121 |
__num_threads = omp_get_num_threads(); |
| 122 |
|
| 123 |
__borders = new _DifferenceType[__num_threads + 2]; |
| 124 |
|
| 125 |
if (__s.partial_sum_dilation == 1.0f) |
| 126 |
__equally_split(__n, __num_threads + 1, __borders); |
| 127 |
else |
| 128 |
{ |
| 129 |
_DifferenceType __first_part_length = |
| 130 |
std::max<_DifferenceType>(1, |
| 131 |
__n / (1.0f + __s.partial_sum_dilation * __num_threads)); |
| 132 |
_DifferenceType __chunk_length = |
| 133 |
(__n - __first_part_length) / __num_threads; |
| 134 |
_DifferenceType __borderstart = |
| 135 |
__n - __num_threads * __chunk_length; |
| 136 |
__borders[0] = 0; |
| 137 |
for (_ThreadIndex __i = 1; __i < (__num_threads + 1); ++__i) |
| 138 |
{ |
| 139 |
__borders[__i] = __borderstart; |
| 140 |
__borderstart += __chunk_length; |
| 141 |
} |
| 142 |
__borders[__num_threads + 1] = __n; |
| 143 |
} |
| 144 |
|
| 145 |
__sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType) |
| 146 |
* __num_threads)); |
| 147 |
_OutputIterator __target_end; |
| 148 |
} //single |
| 149 |
|
| 150 |
_ThreadIndex __iam = omp_get_thread_num(); |
| 151 |
if (__iam == 0) |
| 152 |
{ |
| 153 |
*__result = *__begin; |
| 154 |
__parallel_partial_sum_basecase(__begin + 1, |
| 155 |
__begin + __borders[1], |
| 156 |
__result + 1, |
| 157 |
__bin_op, *__begin); |
| 158 |
::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1)); |
| 159 |
} |
| 160 |
else |
| 161 |
{ |
| 162 |
::new(&(__sums[__iam])) |
| 163 |
_ValueType(__gnu_parallel::accumulate( |
| 164 |
__begin + __borders[__iam] + 1, |
| 165 |
__begin + __borders[__iam + 1], |
| 166 |
*(__begin + __borders[__iam]), |
| 167 |
__bin_op, |
| 168 |
__gnu_parallel::sequential_tag())); |
| 169 |
} |
| 170 |
|
| 171 |
# pragma omp barrier |
| 172 |
|
| 173 |
# pragma omp single |
| 174 |
__parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads, |
| 175 |
__sums + 1, __bin_op, __sums[0]); |
| 176 |
|
| 177 |
# pragma omp barrier |
| 178 |
|
| 179 |
// Still same team. |
| 180 |
__parallel_partial_sum_basecase(__begin + __borders[__iam + 1], |
| 181 |
__begin + __borders[__iam + 2], |
| 182 |
__result + __borders[__iam + 1], |
| 183 |
__bin_op, __sums[__iam]); |
| 184 |
} //parallel |
| 185 |
|
| 186 |
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) |
| 187 |
__sums[__i].~_ValueType(); |
| 188 |
::operator delete(__sums); |
| 189 |
|
| 190 |
delete[] __borders; |
| 191 |
|
| 192 |
return __result + __n; |
| 193 |
} |
| 194 |
|
| 195 |
/** @brief Parallel partial sum front-__end. |
| 196 |
* @param __begin Begin iterator of input sequence. |
| 197 |
* @param __end End iterator of input sequence. |
| 198 |
* @param __result Begin iterator of output sequence. |
| 199 |
* @param __bin_op Associative binary function. |
| 200 |
* @return End iterator of output sequence. */ |
| 201 |
template<typename _IIter, |
| 202 |
typename _OutputIterator, |
| 203 |
typename _BinaryOperation> |
| 204 |
_OutputIterator |
| 205 |
__parallel_partial_sum(_IIter __begin, _IIter __end, |
| 206 |
_OutputIterator __result, _BinaryOperation __bin_op) |
| 207 |
{ |
| 208 |
_GLIBCXX_CALL(__begin - __end) |
| 209 |
|
| 210 |
typedef std::iterator_traits<_IIter> _TraitsType; |
| 211 |
typedef typename _TraitsType::value_type _ValueType; |
| 212 |
typedef typename _TraitsType::difference_type _DifferenceType; |
| 213 |
|
| 214 |
_DifferenceType __n = __end - __begin; |
| 215 |
|
| 216 |
switch (_Settings::get().partial_sum_algorithm) |
| 217 |
{ |
| 218 |
case LINEAR: |
| 219 |
// Need an initial offset. |
| 220 |
return __parallel_partial_sum_linear(__begin, __end, __result, |
| 221 |
__bin_op, __n); |
| 222 |
default: |
| 223 |
// Partial_sum algorithm not implemented. |
| 224 |
_GLIBCXX_PARALLEL_ASSERT(0); |
| 225 |
return __result + __n; |
| 226 |
} |
| 227 |
} |
| 228 |
} |
| 229 |
|
| 230 |
#endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */ |