c%2B%2B/11.2.0/barrier

// <barrier> -*- C++ -*-

// Copyright (C) 2020-2021 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library.  This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.

// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3.  If not see
// <http://www.gnu.org/licenses/>.

// This implementation is based on libcxx/include/barrier
//===-- barrier.h --------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------===//

/** @file include/barrier
 *  This is a Standard C++ Library header.
 */

#ifndef _GLIBCXX_BARRIER
#define _GLIBCXX_BARRIER 1

#pragma GCC system_header

#if __cplusplus > 201703L
#include <bits/atomic_base.h>
#if __cpp_lib_atomic_wait && __cpp_aligned_new
#include <bits/std_thread.h>
#include <bits/unique_ptr.h>

#include <array>

#define __cpp_lib_barrier 201907L

namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION

  struct __empty_completion
  {
    _GLIBCXX_ALWAYS_INLINE void
    operator()() noexcept
    { }
  };

/*

The default implementation of __tree_barrier is a classic tree barrier.

It looks different from literature pseudocode for two main reasons:
 1. Threads that call into std::barrier functions do not provide indices,
    so a numbering step is added before the actual barrier algorithm,
    appearing as an N+1 round to the N rounds of the tree barrier.
 2. A great deal of attention has been paid to avoid cache line thrashing
    by flattening the tree structure into cache-line sized arrays, that
    are indexed in an efficient way.

*/

  enum class __barrier_phase_t : unsigned char { };

  template<typename _CompletionF>
    class __tree_barrier
    {
      using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>;
      using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>;
      static constexpr auto __phase_alignment =
                      __atomic_phase_ref_t::required_alignment;

      using __tickets_t = std::array<__barrier_phase_t, 64>;
      struct alignas(64) /* naturally-align the heap state */ __state_t
      {
        alignas(__phase_alignment) __tickets_t __tickets;
      };

      ptrdiff_t _M_expected;
      unique_ptr<__state_t[]> _M_state;
      __atomic_base<ptrdiff_t> _M_expected_adjustment;
      _CompletionF _M_completion;

      alignas(__phase_alignment) __barrier_phase_t  _M_phase;

      bool
      _M_arrive(__barrier_phase_t __old_phase, size_t __current)
      {
        const auto __old_phase_val = static_cast<unsigned char>(__old_phase);
        const auto __half_step =
                           static_cast<__barrier_phase_t>(__old_phase_val + 1);
        const auto __full_step =
                           static_cast<__barrier_phase_t>(__old_phase_val + 2);

        size_t __current_expected = _M_expected;
        __current %= ((_M_expected + 1) >> 1);

        for (int __round = 0; ; ++__round)
          {
            if (__current_expected <= 1)
                return true;
            size_t const __end_node = ((__current_expected + 1) >> 1),
                         __last_node = __end_node - 1;
            for ( ; ; ++__current)
              {
                if (__current == __end_node)
                  __current = 0;
                auto __expect = __old_phase;
                __atomic_phase_ref_t __phase(_M_state[__current]
                                                .__tickets[__round]);
                if (__current == __last_node && (__current_expected & 1))
                  {
                    if (__phase.compare_exchange_strong(__expect, __full_step,
                                                        memory_order_acq_rel))
                      break;     // I'm 1 in 1, go to next __round
                  }
                else if (__phase.compare_exchange_strong(__expect, __half_step,
                                                         memory_order_acq_rel))
                  {
                    return false; // I'm 1 in 2, done with arrival
                  }
                else if (__expect == __half_step)
                  {
                    if (__phase.compare_exchange_strong(__expect, __full_step,
                                                        memory_order_acq_rel))
                      break;    // I'm 2 in 2, go to next __round
                  }
              }
            __current_expected = __last_node + 1;
            __current >>= 1;
          }
      }

    public:
      using arrival_token = __barrier_phase_t;

      static constexpr ptrdiff_t
      max() noexcept
      { return __PTRDIFF_MAX__; }

      __tree_barrier(ptrdiff_t __expected, _CompletionF __completion)
          : _M_expected(__expected), _M_expected_adjustment(0),
            _M_completion(move(__completion)),
            _M_phase(static_cast<__barrier_phase_t>(0))
      {
        size_t const __count = (_M_expected + 1) >> 1;

        _M_state = std::make_unique<__state_t[]>(__count);
      }

      [[nodiscard]] arrival_token
      arrive(ptrdiff_t __update)
      {
        std::hash<std::thread::id> __hasher;
        size_t __current = __hasher(std::this_thread::get_id());
        __atomic_phase_ref_t __phase(_M_phase);
        const auto __old_phase = __phase.load(memory_order_relaxed);
        const auto __cur = static_cast<unsigned char>(__old_phase);
        for(; __update; --__update)
          {
            if(_M_arrive(__old_phase, __current))
              {
                _M_completion();
                _M_expected += _M_expected_adjustment.load(memory_order_relaxed);
                _M_expected_adjustment.store(0, memory_order_relaxed);
                auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2);
                __phase.store(__new_phase, memory_order_release);
                __phase.notify_all();
              }
          }
        return __old_phase;
      }

      void
      wait(arrival_token&& __old_phase) const
      {
        __atomic_phase_const_ref_t __phase(_M_phase);
        auto const __test_fn = [=]
          {
            return __phase.load(memory_order_acquire) != __old_phase;
          };
        std::__atomic_wait_address(&_M_phase, __test_fn);
      }

      void
      arrive_and_drop()
      {
        _M_expected_adjustment.fetch_sub(1, memory_order_relaxed);
        (void)arrive(1);
      }
    };

  template<typename _CompletionF = __empty_completion>
    class barrier
    {
      // Note, we may introduce a "central" barrier algorithm at some point
      // for more space constrained targets
      using __algorithm_t = __tree_barrier<_CompletionF>;
      __algorithm_t _M_b;

    public:
      class arrival_token final
      {
      public:
        arrival_token(arrival_token&&) = default;
        arrival_token& operator=(arrival_token&&) = default;
        ~arrival_token() = default;

      private:
        friend class barrier;
        using __token = typename __algorithm_t::arrival_token;
        explicit arrival_token(__token __tok) noexcept : _M_tok(__tok) { }
        __token _M_tok;
      };

      static constexpr ptrdiff_t
      max() noexcept
      { return __algorithm_t::max(); }

      explicit
      barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
      : _M_b(__count, std::move(__completion))
      { }

      barrier(barrier const&) = delete;
      barrier& operator=(barrier const&) = delete;

      [[nodiscard]] arrival_token
      arrive(ptrdiff_t __update = 1)
      { return arrival_token{_M_b.arrive(__update)}; }

      void
      wait(arrival_token&& __phase) const
      { _M_b.wait(std::move(__phase._M_tok)); }

      void
      arrive_and_wait()
      { wait(arrive()); }

      void
      arrive_and_drop()
      { _M_b.arrive_and_drop(); }
    };

_GLIBCXX_END_NAMESPACE_VERSION
} // namespace
#endif // __cpp_lib_atomic_wait && __cpp_aligned_new
#endif // __cplusplus > 201703L
#endif // _GLIBCXX_BARRIER
Revision:	1166
Committed:	Tue Oct 26 14:22:36 2021 UTC (4 years ago) by rossy
File size:	7750 byte(s)
Log Message:	Daodan: Replace MinGW build env with an up-to-date MSYS2 env
#	Content
1	// <barrier> -- C++ --
2
3	// Copyright (C) 2020-2021 Free Software Foundation, Inc.
4	//
5	// This file is part of the GNU ISO C++ Library. This library is free
6	// software; you can redistribute it and/or modify it under the
7	// terms of the GNU General Public License as published by the
8	// Free Software Foundation; either version 3, or (at your option)
9	// any later version.
10
11	// This library is distributed in the hope that it will be useful,
12	// but WITHOUT ANY WARRANTY; without even the implied warranty of
13	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	// GNU General Public License for more details.
15
16	// You should have received a copy of the GNU General Public License along
17	// with this library; see the file COPYING3. If not see
18	// <http://www.gnu.org/licenses/>.
19
20	// This implementation is based on libcxx/include/barrier
21	//===-- barrier.h --------------------------------------------------===//
22	//
23	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
24	// See https://llvm.org/LICENSE.txt for license information.
25	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
26	//
27	//===---------------------------------------------------------------===//
28
29	/** @file include/barrier
30	* This is a Standard C++ Library header.
31	*/
32
33	#ifndef _GLIBCXX_BARRIER
34	#define _GLIBCXX_BARRIER 1
35
36	#pragma GCC system_header
37
38	#if __cplusplus > 201703L
39	#include <bits/atomic_base.h>
40	#if __cpp_lib_atomic_wait && __cpp_aligned_new
41	#include <bits/std_thread.h>
42	#include <bits/unique_ptr.h>
43
44	#include <array>
45
46	#define __cpp_lib_barrier 201907L
47
48	namespace std _GLIBCXX_VISIBILITY(default)
49	{
50	_GLIBCXX_BEGIN_NAMESPACE_VERSION
51
52	struct __empty_completion
53	{
54	_GLIBCXX_ALWAYS_INLINE void
55	operator()() noexcept
56	{ }
57	};
58
59	/*
60
61	The default implementation of __tree_barrier is a classic tree barrier.
62
63	It looks different from literature pseudocode for two main reasons:
64	1. Threads that call into std::barrier functions do not provide indices,
65	so a numbering step is added before the actual barrier algorithm,
66	appearing as an N+1 round to the N rounds of the tree barrier.
67	2. A great deal of attention has been paid to avoid cache line thrashing
68	by flattening the tree structure into cache-line sized arrays, that
69	are indexed in an efficient way.
70
71	*/
72
73	enum class __barrier_phase_t : unsigned char { };
74
75	template<typename _CompletionF>
76	class __tree_barrier
77	{
78	using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>;
79	using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>;
80	static constexpr auto __phase_alignment =
81	__atomic_phase_ref_t::required_alignment;
82
83	using __tickets_t = std::array<__barrier_phase_t, 64>;
84	struct alignas(64) /* naturally-align the heap state */ __state_t
85	{
86	alignas(__phase_alignment) __tickets_t __tickets;
87	};
88
89	ptrdiff_t _M_expected;
90	unique_ptr<__state_t[]> _M_state;
91	__atomic_base<ptrdiff_t> _M_expected_adjustment;
92	_CompletionF _M_completion;
93
94	alignas(__phase_alignment) __barrier_phase_t _M_phase;
95
96	bool
97	_M_arrive(__barrier_phase_t __old_phase, size_t __current)
98	{
99	const auto __old_phase_val = static_cast<unsigned char>(__old_phase);
100	const auto __half_step =
101	static_cast<__barrier_phase_t>(__old_phase_val + 1);
102	const auto __full_step =
103	static_cast<__barrier_phase_t>(__old_phase_val + 2);
104
105	size_t __current_expected = _M_expected;
106	__current %= ((_M_expected + 1) >> 1);
107
108	for (int __round = 0; ; ++__round)
109	{
110	if (__current_expected <= 1)
111	return true;
112	size_t const __end_node = ((__current_expected + 1) >> 1),
113	__last_node = __end_node - 1;
114	for ( ; ; ++__current)
115	{
116	if (__current == __end_node)
117	__current = 0;
118	auto __expect = __old_phase;
119	__atomic_phase_ref_t __phase(_M_state[__current]
120	.__tickets[__round]);
121	if (__current == __last_node && (__current_expected & 1))
122	{
123	if (__phase.compare_exchange_strong(__expect, __full_step,
124	memory_order_acq_rel))
125	break; // I'm 1 in 1, go to next __round
126	}
127	else if (__phase.compare_exchange_strong(__expect, __half_step,
128	memory_order_acq_rel))
129	{
130	return false; // I'm 1 in 2, done with arrival
131	}
132	else if (__expect == __half_step)
133	{
134	if (__phase.compare_exchange_strong(__expect, __full_step,
135	memory_order_acq_rel))
136	break; // I'm 2 in 2, go to next __round
137	}
138	}
139	__current_expected = __last_node + 1;
140	__current >>= 1;
141	}
142	}
143
144	public:
145	using arrival_token = __barrier_phase_t;
146
147	static constexpr ptrdiff_t
148	max() noexcept
149	{ return __PTRDIFF_MAX__; }
150
151	__tree_barrier(ptrdiff_t __expected, _CompletionF __completion)
152	: _M_expected(__expected), _M_expected_adjustment(0),
153	_M_completion(move(__completion)),
154	_M_phase(static_cast<__barrier_phase_t>(0))
155	{
156	size_t const __count = (_M_expected + 1) >> 1;
157
158	_M_state = std::make_unique<__state_t[]>(__count);
159	}
160
161	[[nodiscard]] arrival_token
162	arrive(ptrdiff_t __update)
163	{
164	std::hash<std::thread::id> __hasher;
165	size_t __current = __hasher(std::this_thread::get_id());
166	__atomic_phase_ref_t __phase(_M_phase);
167	const auto __old_phase = __phase.load(memory_order_relaxed);
168	const auto __cur = static_cast<unsigned char>(__old_phase);
169	for(; __update; --__update)
170	{
171	if(_M_arrive(__old_phase, __current))
172	{
173	_M_completion();
174	_M_expected += _M_expected_adjustment.load(memory_order_relaxed);
175	_M_expected_adjustment.store(0, memory_order_relaxed);
176	auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2);
177	__phase.store(__new_phase, memory_order_release);
178	__phase.notify_all();
179	}
180	}
181	return __old_phase;
182	}
183
184	void
185	wait(arrival_token&& __old_phase) const
186	{
187	__atomic_phase_const_ref_t __phase(_M_phase);
188	auto const __test_fn = [=]
189	{
190	return __phase.load(memory_order_acquire) != __old_phase;
191	};
192	std::__atomic_wait_address(&_M_phase, __test_fn);
193	}
194
195	void
196	arrive_and_drop()
197	{
198	_M_expected_adjustment.fetch_sub(1, memory_order_relaxed);
199	(void)arrive(1);
200	}
201	};
202
203	template<typename _CompletionF = __empty_completion>
204	class barrier
205	{
206	// Note, we may introduce a "central" barrier algorithm at some point
207	// for more space constrained targets
208	using __algorithm_t = __tree_barrier<_CompletionF>;
209	__algorithm_t _M_b;
210
211	public:
212	class arrival_token final
213	{
214	public:
215	arrival_token(arrival_token&&) = default;
216	arrival_token& operator=(arrival_token&&) = default;
217	~arrival_token() = default;
218
219	private:
220	friend class barrier;
221	using __token = typename __algorithm_t::arrival_token;
222	explicit arrival_token(__token __tok) noexcept : _M_tok(__tok) { }
223	__token _M_tok;
224	};
225
226	static constexpr ptrdiff_t
227	max() noexcept
228	{ return __algorithm_t::max(); }
229
230	explicit
231	barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
232	: _M_b(__count, std::move(__completion))
233	{ }
234
235	barrier(barrier const&) = delete;
236	barrier& operator=(barrier const&) = delete;
237
238	[[nodiscard]] arrival_token
239	arrive(ptrdiff_t __update = 1)
240	{ return arrival_token{_M_b.arrive(__update)}; }
241
242	void
243	wait(arrival_token&& __phase) const
244	{ _M_b.wait(std::move(__phase._M_tok)); }
245
246	void
247	arrive_and_wait()
248	{ wait(arrive()); }
249
250	void
251	arrive_and_drop()
252	{ _M_b.arrive_and_drop(); }
253	};
254
255	_GLIBCXX_END_NAMESPACE_VERSION
256	} // namespace
257	#endif // __cpp_lib_atomic_wait && __cpp_aligned_new
258	#endif // __cplusplus > 201703L
259	#endif // _GLIBCXX_BARRIER