KaMPIng 0.1.1
Flexible and (near) zero-overhead C++ bindings for MPI
Loading...
Searching...
No Matches
reproducible_reduce.hpp
1#include <functional>
2#include <map>
3#include <memory>
4#include <numeric>
5#include <type_traits>
6#include <vector>
7
9#include "kamping/communicator.hpp"
14#include "kamping/named_parameters_detail/status_parameters.hpp"
15#include "kamping/p2p/isend.hpp"
16#include "kamping/p2p/recv.hpp"
17#include "kamping/plugin/plugin_helpers.hpp"
18
19namespace kamping::plugin {
20
21// Binary Tree Reduce
22namespace reproducible_reduce {
23
24/// @brief Encapsulates a single intermediate result (value) and its index
25/// @tparam T Type of the stored value.
26template <typename T>
28 /// @brief Global index according to reduction order
29 size_t index;
30 /// @brief Intermediate value during calculation
32};
33
34constexpr uint8_t MAX_MESSAGE_LENGTH = 4;
35constexpr int MESSAGEBUFFER_MPI_TAG = 0xb586772;
36
37/// @brief Responsible for storing and communicating intermediate results between PEs.
38/// @tparam T Type of the stored values.
39/// @tparam Communicator Type of the underlying communicator.
40template <typename T, typename Communicator>
42 // TODO: how to shorten this result type
46 Request,
49 internal::BufferModifiability::modifiable,
50 internal::BufferOwnership::owning,
51 internal::BufferType::out_buffer,
53 internal::BufferAllocation::lib_allocated,
55
56public:
57 /// @brief Construct a new message buffer utilizing the given communicator \p comm
58 /// @param comm Underlying communicator used to send the messages.
60 : _entries(),
61 _inbox(),
62 _target_rank(),
63 _outbox(),
64 _buffer(),
65 _request(nullptr),
66 _awaited_numbers(0),
67 _sent_messages(0),
68 _sent_elements(0),
69 _send_buffer_clear(true),
70 _comm(comm) {
71 _outbox.reserve(MAX_MESSAGE_LENGTH + 1);
72 _buffer.reserve(MAX_MESSAGE_LENGTH + 1);
73 }
74
75 /// @brief Receive a message from another PE and store its contents.
76 ///
77 /// @param source_rank Rank of the sender.
78 void receive(int const source_rank) {
79 _comm.recv(
81 tag(MESSAGEBUFFER_MPI_TAG),
83 recv_count(MAX_MESSAGE_LENGTH * sizeof(MessageBufferEntry<T>))
84 );
85
86 // Extract values from the message
87 for (auto const entry: _buffer) {
88 _inbox[entry.index] = entry.value;
89 }
90 }
91
92 /// @brief Asynchronously send locally stored intermediate results.
93 ///
94 /// If there are none, no message is dispatched.
95 void flush(void) {
96 if (!_target_rank.has_value() || _outbox.empty())
97 return;
98
99 _request = std::make_unique<ResultType>(send());
100 ++_sent_messages;
101
102 _target_rank.reset();
103 _send_buffer_clear = false;
104 }
105
106 /// @brief Wait until the message dispatched by flush() is actually sent and clear any stored values.
107 void wait(void) {
108 if (_send_buffer_clear) {
109 return;
110 }
111
112 _request->wait();
113 _outbox.clear();
114 _send_buffer_clear = true;
115 }
116
117 /// @brief Store an intermediate result inside the message buffer for eventual transmission to its destination.
118 ///
119 /// Triggers a send if
120 /// 1. the target rank of the currently stored values does not coincide with \p target_rank or
121 /// 2. the message buffer is already full
122 /// 3. the message buffer is full after adding \p value
123 ///
124 /// @param target_rank Rank of the PE which requires the value for further processing.
125 /// @param index Global index of the value being sent.
126 /// @param value Actual value that must be sent.
127 void put(int const target_rank, size_t const index, T const value) {
128 bool const outbox_full = _outbox.size() >= MAX_MESSAGE_LENGTH;
129 bool const buffer_addressed_to_different_rank = _target_rank.has_value() && _target_rank != target_rank;
131 flush();
132 }
133 wait();
134
135 // We can now overwrite target rank because either
136 // A) it was previously different but flush() has reset it or
137 // B) it already has the same value.
138 _target_rank = target_rank;
139
140 KASSERT(_outbox.size() < _outbox.capacity());
141 KASSERT(_outbox.capacity() > 0);
142 MessageBufferEntry<T> entry{index, value};
143 _outbox.push_back(entry);
144
145 if (_outbox.size() >= MAX_MESSAGE_LENGTH) {
146 flush();
147 }
148 ++_sent_elements;
149 }
150
151 /// @brief Get the intermediate result with the specified \p index from \p source_rank.
152 ///
153 /// If the value has been received beforehand, it is immediately returned.
154 /// Otherwise the method blocks until the message from \p source_rank containing the value arrives.
155 ///
156 /// @param source_rank Rank of the PE that holds the desired intermediate result.
157 /// @param index Global index of the intermediate result.
158 T const get(int const source_rank, size_t const index) {
159 auto const entry = _inbox.find(index);
160 T value;
161
162 if (entry != _inbox.end()) {
163 // If we have the number in our inbox, directly return it.
164 value = entry->second;
165 _inbox.erase(entry);
166 } else {
167 // If not, we will wait for a message, but make sure no one is waiting for our results.
168 flush();
169 wait();
171
172 auto const new_entry = _inbox.find(index);
173 KASSERT(new_entry != _inbox.end());
174 value = new_entry->second;
175 _inbox.erase(new_entry);
176 }
177
178 return value;
179 }
180
181private:
182 auto send() {
183 return _comm.isend(send_buf(_outbox), destination(*_target_rank), tag(MESSAGEBUFFER_MPI_TAG), request());
184 }
185
186private:
187 std::array<MessageBufferEntry<T>, MAX_MESSAGE_LENGTH> _entries;
188 std::map<uint64_t, T> _inbox;
189 std::optional<int> _target_rank;
190 std::vector<MessageBufferEntry<T>> _outbox;
191 std::vector<MessageBufferEntry<T>> _buffer;
192 std::unique_ptr<ResultType> _request;
193 size_t _awaited_numbers;
194 size_t _sent_messages;
195 size_t _sent_elements;
196 bool _send_buffer_clear;
197 Communicator const& _comm;
198};
199
200// Helper functions
201
202/// @brief Get the index of the parent of non-negative index \p i.
203inline auto tree_parent(size_t const i) {
204 KASSERT(i != 0);
205
206 // Clear least significand set bit
207 return i & (i - 1);
208}
209
210/// @brief Return the number of indices contained by the subtree with index \p i.
211inline auto tree_subtree_size(size_t const i) {
212 auto const largest_child_index{i | (i - 1)};
213 return largest_child_index + 1 - i;
214}
215
216/// @brief Return the rank of the PE that holds the intermediate result with the specified \p index according to a \p
217/// start_indices map.
218inline auto tree_rank_from_index_map(std::map<size_t, size_t> const& start_indices, size_t const index) {
219 // Get an iterator to the start index that is greater than index
220 auto it = start_indices.upper_bound(index);
221 KASSERT(it != start_indices.begin());
222 --it;
223
224 return kamping::asserting_cast<size_t>(it->second);
225}
226
227/// @brief Calculate the indices of intermediate results that must be communicated to other PEs.
228///
229/// @param region_begin Index of the first element assigned to the local rank.
230/// @param region_end Index of the first element larger than \p region_begin that is not assigned to the local PE.
231inline auto tree_rank_intersecting_elements(size_t const region_begin, size_t const region_end) {
232 std::vector<size_t> result;
233
234 size_t const region_size = region_end - region_begin;
235
236 if (region_begin == 0 || region_size == 0) {
237 return result;
238 }
239
240 size_t index{region_begin};
241 while (index < region_end) {
242 if (index > 0) {
243 KASSERT(tree_parent(index) < region_begin);
244 }
245 result.push_back(index);
246 index += tree_subtree_size(index);
247 }
248
249 return result;
250}
251
252/// @brief Calculate the logarithm to base 2 of the specified \p value.
253///
254/// Rounds down:
255///
256/// @code{.cpp}
257/// ( log2l(4) == 2 == log2l(5) )
258/// @endcode
259inline auto log2l(size_t const value) {
260 // See https://stackoverflow.com/a/994623
261 size_t i = value;
262 unsigned int target_value = 0;
263 while (i >>= 1)
264 ++target_value;
265
266 return target_value;
267}
268
269/// @brief Return the number of necessary passes through the array to fully reduce the subtree with the specified \p
270/// index.
271inline size_t subtree_height(size_t const index) {
272 KASSERT(index != 0);
273
274 return log2l(tree_subtree_size(index));
275}
276
277/// @brief Return the number of necessary passes through the array to fully reduce a tree with \p global_size elements.
278inline size_t tree_height(size_t const global_size) {
279 if (global_size == 0) {
280 return 0U;
281 }
282
283 unsigned int result = log2l(global_size);
284
285 if (global_size > (1UL << result)) {
286 return result + 1;
287 } else {
288 return result;
289 }
290}
291
292/// @brief Communicator that can reproducibly reduce an array of a fixed size according to a binary tree scheme.
293///
294/// @tparam T Type of the elements that are to be reduced.
295/// @tparam DefaultContainerType Container type of the original communicator.
296template <typename T, typename Communicator>
298public:
299 /// @brief Create a new reproducible communicator.
300 /// @tparam Comm Type of the communicator.
301 /// @param comm Underlying communicator to transport messages.
302 /// @param start_indices Map from global array indices onto ranks on which they are held. Must have no gaps, start
303 /// at index 0 and contain a sentinel element at the end.
304 /// @param region_begin Index of the first element that is held locally.
305 /// @param region_size Number of elements assigned to the current rank.
307 Communicator const& comm,
308 std::map<size_t, size_t> const start_indices,
309 size_t const region_begin,
310 size_t const region_size
311 )
312 : _start_indices{start_indices},
313 _region_begin{region_begin},
314 _region_size{region_size},
315 _region_end{region_begin + region_size},
316 _global_size{(--start_indices.end())->first},
317 _origin_rank{_global_size == 0 ? 0UL : tree_rank_from_index_map(_start_indices, 0)},
318 _comm{comm},
319 _rank_intersecting_elements(tree_rank_intersecting_elements(_region_begin, _region_end)),
320 _reduce_buffer(_region_size),
321 _message_buffer(_comm) {}
322
323 /// @brief Reproducible reduction according to pre-initialized scheme.
324 /// The following parameters are required:
325 /// - \ref kamping::send_buf() containing the local elements that are reduced. This buffer has to match the size
326 /// specified during creation of the \ref ReproducibleCommunicator.
327 /// - \ref kamping::op() wrapping the operation to apply to the input.
328 ///
329 /// @param args All required arguments as described above.
330 /// @return Final reduction result obtained by applying the operation in a fixed order to all input elements across
331 /// PEs.
332 template <typename... Args>
335
336 // get send buffer
337 auto&& send_buf =
338 internal::select_parameter_type<internal::ParameterType::send_buf>(args...).construct_buffer_or_rebind();
339 using send_value_type = typename std::remove_reference_t<decltype(send_buf)>::value_type;
340
341 KASSERT(
342 send_buf.size() == _region_size,
343 "send_buf must have the same size as specified during creation of the reproducible communicator. "
344 << "Is " << send_buf.size() << " but should be " << _region_size << " on rank " << _comm.rank()
345 );
346
347 static_assert(
348 std::is_same_v<std::remove_const_t<send_value_type>, T>,
349 "send type must be equal to the type used during Communicator initiation"
350 );
351
352 // Get the operation used for the reduction. The signature of the provided function is checked while building.
353 auto& operation_param = internal::select_parameter_type<internal::ParameterType::op>(args...);
354 // If you want to understand the syntax of the following line, ignore the "template " ;-)
356
357 return _perform_reduce(send_buf.data(), operation);
358 }
359
360private:
361 template <typename Func>
362 T const _perform_reduce(T const* buffer, Func&& op) {
363 for (auto const index: _rank_intersecting_elements) {
364 if (tree_subtree_size(index) > 16) {
365 // If we are about to do some considerable amount of work, make sure
366 // the send buffer is empty so noone is waiting for our results
367 _message_buffer.flush();
368 }
369 auto const target_rank = tree_rank_from_index_map(_start_indices, tree_parent(index));
370 T const value = _perform_reduce(index, buffer, op);
371 _message_buffer.put(asserting_cast<int>(target_rank), index, value);
372 }
373
374 _message_buffer.flush();
375 _message_buffer.wait();
376
377 T result;
378 if (_comm.rank() == _origin_rank) {
379 result = _perform_reduce(0, buffer, op);
380 }
381
382 _comm.bcast_single(kamping::send_recv_buf(result), kamping::root(_origin_rank));
383
384 return result;
385 }
386
387 template <typename Func>
388 T const _perform_reduce(size_t const index, T const* buffer, Func&& op) {
389 if ((index & 1) == 1) {
390 return buffer[index - _region_begin];
391 }
392
393 size_t const max_x =
394 (index == 0) ? _global_size - 1 : std::min(_global_size - 1, index + tree_subtree_size(index) - 1);
395 size_t const max_y = (index == 0) ? tree_height(_global_size) : subtree_height(index);
396
397 KASSERT(max_y < 64, "Unreasonably large max_y");
398
399 size_t const largest_local_index = std::min(max_x, _region_end - 1);
400 auto const n_local_elements = largest_local_index + 1 - index;
401
402 size_t elements_in_buffer = n_local_elements;
403 T* destination_buffer = _reduce_buffer.data();
404 T const* source_buffer = static_cast<T const*>(buffer + (index - _region_begin));
405
406 for (size_t y = 1; y <= max_y; y += 1) {
407 size_t const stride = 1UL << (y - 1);
408 size_t elements_written = 0;
409
410 for (size_t x = 0; x + 2 <= elements_in_buffer; x += 2) {
411 T const a = source_buffer[x];
412 T const b = source_buffer[x + 1];
413 destination_buffer[elements_written++] = op(a, b);
414 }
415 size_t const remaining_elements = elements_in_buffer - 2 * elements_written;
416 KASSERT(remaining_elements <= 1);
417
418 if (remaining_elements == 1) {
419 auto const indexA = index + (elements_in_buffer - 1) * stride;
420 auto const indexB = indexA + stride;
421
422 T const elementA = source_buffer[elements_in_buffer - 1];
423 if (indexB > max_x) {
424 // This element is the last because the subtree ends here
425 destination_buffer[elements_written++] = elementA;
426 } else {
427 auto const source_rank = tree_rank_from_index_map(_start_indices, indexB);
428 T elementB = _message_buffer.get(asserting_cast<int>(source_rank), indexB);
429 destination_buffer[elements_written++] = op(elementA, elementB);
430 }
431 }
432
433 // After first iteration, read only from accumulation buffer
434 source_buffer = destination_buffer;
435 elements_in_buffer = elements_written;
436 }
437
438 KASSERT(elements_in_buffer == 1);
439 return destination_buffer[0];
440 }
441
442 std::map<size_t, size_t> const _start_indices;
443 size_t const _region_begin, _region_size, _region_end, _global_size;
444 size_t const _origin_rank;
445 Communicator const& _comm;
446 std::vector<size_t> const _rank_intersecting_elements;
447 std::vector<T> _reduce_buffer;
448 MessageBuffer<T, Communicator> _message_buffer;
449}; // namespace kamping::plugin
450} // namespace reproducible_reduce
451
452/// @brief Reproducible reduction of distributed arrays.
453///
454/// To make a reduction operation reproducible independent of communicator size and operation associativity, the
455/// computation order must be fixed. We assign a global index to each element and let a binary tree dictate the
456/// computation as seen in the figure below:
457///
458/// \image html tree_reduction.svg "Reduction of 16 elements distributed over 4 PEs"
459///
460///
461/// The ordering of array elements must not necessarily follow the rank order of PEs.
462/// We represent the distribution of array elements as a list of send_counts and displacements for each rank.
463/// For the example above, send_counts would be `{4, 4, 4, 4}` since each rank
464/// keeps four elements, and the displacement would be `{8, 4, 0, 12}`, since
465/// the first element of rank 0 has index 8, the first element of rank 1 has
466/// index 4 and so on.
467///
468/// More background of reproducible reduction is provided
469/// [here](https://cme.h-its.org/exelixis/pubs/bachelorChristop.pdf).
470///
471template <typename Comm, template <typename...> typename DefaultContainerType>
473 : public kamping::plugin::PluginBase<Comm, DefaultContainerType, ReproducibleReducePlugin> {
474public:
475 /// @brief Create a communicator with a fixed distribution of a global array that can perform reductions in the same
476 /// reduction order.
477 ///
478 /// The following parameters are required:
479 /// - \ref kamping::send_counts() containing the number of elements each rank holds locally.
480 /// - \ref kamping::recv_displs() containing the displacement (a.k.a. starting index) for each rank.
481 ///
482 /// For further details, see documentation of the \ref ReproducibleReducePlugin
483 ///
484 /// Note that the reduce operation sends messages with the tag `0xb586772`.
485 /// During the reduce, no messages shall be sent on the underlying
486 /// communicator with this tag to avoid interference and potential
487 /// deadlocks.
488 ///
489 /// @tparam T Type of the elements that are to be reduced.
490 /// @param args All required arguments as specified above.
491 /// @return A \ref reproducible_reduce::ReproducibleCommunicator
492 template <typename T, typename... Args>
494 using namespace kamping;
495
497 Args,
500 );
501
503 auto&& recv_displs =
504 internal::select_parameter_type_or_default<internal::ParameterType::recv_displs, default_recv_displs_type>(
505 std::tuple(),
506 args...
507 )
509 using recv_displs_type = typename std::remove_reference_t<decltype(recv_displs)>::value_type;
510 static_assert(std::is_same_v<std::remove_const_t<recv_displs_type>, int>, "Recv displs must be of type int");
511
512 auto const& send_counts = internal::select_parameter_type<internal::ParameterType::send_counts>(args...)
514 // This is the value type (i.e. of the underlying container)
515 // using sendcounts_type = typename std::remove_reference_t<decltype(send_counts)>::value_type;
516
517 auto comm = this->to_communicator();
518 KASSERT(send_counts.size() == comm.size(), "send_counts must be of same size as communicator");
519 KASSERT(recv_displs.size() == comm.size(), "recv_displs must be of same size as communicator");
520
521 auto const global_array_length = static_cast<size_t>(
522 std::reduce(send_counts.data(), send_counts.data() + send_counts.size(), 0, std::plus<>())
523 );
524
525 // Assert distribution is the same on all ranks
526 for (auto i = 0U; i < send_counts.size(); ++i) {
527 KASSERT(
528 comm.is_same_on_all_ranks(send_counts.data()[i]),
529 "send_counts value for rank " << i << " is not uniform across the cluster",
531 );
532 KASSERT(
533 comm.is_same_on_all_ranks(recv_displs.data()[i]),
534 "recv_displs value for rank " << i << " is not uniform across the cluster",
536 );
537 }
538
539 KASSERT(global_array_length > 0, "The array must not be empty");
540
541 // Construct index map which maps global array indices to PEs
542 std::map<size_t, size_t> start_indices;
543 for (size_t p = 0; p < comm.size(); ++p) {
544 KASSERT(send_counts.data()[p] >= 0, "send_count for rank " << p << " is negative");
545 KASSERT(recv_displs.data()[p] >= 0, "displacement for rank " << p << " is negative");
546
547 if (send_counts.data()[p] == 0) {
548 continue;
549 }
550
552 }
553 start_indices[global_array_length] = comm.size(); // guardian element
554
555 KASSERT(start_indices.begin()->first >= 0UL, "recv_displs must not contain negative displacements");
556 KASSERT(start_indices.begin()->first == 0UL, "recv_displs must have entry for index 0");
557
558 // Verify correctness of index map
559 for (auto it = start_indices.begin(); it != start_indices.end(); ++it) {
560 auto const next = std::next(it);
561 if (next == start_indices.end())
562 break;
563
564 auto const rank = it->second;
565 auto const region_start = it->first;
566 auto const region_end = region_start + asserting_cast<size_t>(send_counts.data()[rank]);
567 auto const next_rank = next->second;
568 auto const next_region_start = next->first;
569
570 KASSERT(
572 "Region of rank " << rank << " ends at index " << region_end << ", but next region of rank "
573 << next_rank << " starts at index " << next_region_start
574 );
575 }
576
578 this->to_communicator(),
582 );
583 }
584};
585} // namespace kamping::plugin
@ source
only additionally add the source PE in the envelope (if possible)
Definition alltoall_grid.hpp:32
Helper functions that make casts safer.
Wrapper for MPI communicator providing access to rank() and size() of the communicator....
Definition communicator.hpp:49
size_t rank() const
Rank of the current MPI process in the communicator as size_t.
Definition communicator.hpp:155
STL-compatible allocator for requesting memory using the builtin MPI allocator.
Definition allocator.hpp:32
NonBlockingResult contains the result of a non-blocking MPI call wrapped by KaMPIng....
Definition result.hpp:1108
Wrapper for MPI request handles (aka. MPI_Request).
Definition request.hpp:145
Data buffer used for named parameters.
Definition data_buffer.hpp:371
Reproducible reduction of distributed arrays.
Definition reproducible_reduce.hpp:473
auto make_reproducible_comm(Args... args)
Create a communicator with a fixed distribution of a global array that can perform reductions in the ...
Definition reproducible_reduce.hpp:493
Responsible for storing and communicating intermediate results between PEs.
Definition reproducible_reduce.hpp:41
void put(int const target_rank, size_t const index, T const value)
Store an intermediate result inside the message buffer for eventual transmission to its destination.
Definition reproducible_reduce.hpp:127
void wait(void)
Wait until the message dispatched by flush() is actually sent and clear any stored values.
Definition reproducible_reduce.hpp:107
void receive(int const source_rank)
Receive a message from another PE and store its contents.
Definition reproducible_reduce.hpp:78
MessageBuffer(Communicator const &comm)
Construct a new message buffer utilizing the given communicator comm.
Definition reproducible_reduce.hpp:59
T const get(int const source_rank, size_t const index)
Get the intermediate result with the specified index from source_rank.
Definition reproducible_reduce.hpp:158
void flush(void)
Asynchronously send locally stored intermediate results.
Definition reproducible_reduce.hpp:95
Communicator that can reproducibly reduce an array of a fixed size according to a binary tree scheme.
Definition reproducible_reduce.hpp:297
T const reproducible_reduce(Args... args)
Reproducible reduction according to pre-initialized scheme. The following parameters are required:
Definition reproducible_reduce.hpp:333
ReproducibleCommunicator(Communicator const &comm, std::map< size_t, size_t > const start_indices, size_t const region_begin, size_t const region_size)
Create a new reproducible communicator.
Definition reproducible_reduce.hpp:306
constexpr int light_communication
Assertions that perform lightweight communication.
Definition assertion_levels.hpp:25
auto bcast_single(Args... args) const
Wrapper for MPI_Bcast.
Definition bcast.hpp:246
static constexpr auto alloc_new
Convenience wrapper for creating library allocated containers. See AllocNewT for details.
Definition data_buffer.hpp:194
@ no_resize
Policy indicating that the underlying buffer shall never be resized.
auto tag(internal::any_tag_t)
Indicates to use MPI_ANY_TAG as tag in the underlying call.
Definition named_parameters.hpp:1064
auto destination(int rank)
Passes rank as destination rank to the underlying call. This parameter is needed in point-to-point ex...
Definition named_parameters.hpp:999
auto root(int rank)
Passes rank as root rank to the underlying call. This parameter is needed in functions like MPI_Gathe...
Definition named_parameters.hpp:979
internal::OperationBuilder< Op, Commutative > op(Op &&op, Commutative commute=ops::internal::undefined_commutative_tag{})
Passes a reduction operation to ther underlying call. Accepts function objects, lambdas,...
Definition named_parameters.hpp:1155
auto request()
Internally allocate a request object and return it to the user.
Definition named_parameters.hpp:1122
auto send_buf(internal::ignore_t< Data > ignore)
Generates a dummy send buf that wraps a nullptr.
Definition named_parameters.hpp:51
auto send_recv_buf(Data &&data)
Passes a container/single value as a send or receive buffer to the underlying MPI call.
Definition named_parameters.hpp:137
auto recv_displs_out()
Indicates to construct a container with type kamping::Communicator::default_container_type<int>,...
Definition named_parameters.hpp:802
auto send_counts(Container &&container)
Passes a container as send counts to the underlying call, i.e. the container's storage must contain t...
Definition named_parameters.hpp:203
auto recv_displs(Container &&container)
Passes a container as receive displacements to the underlying call, i.e. the container's storage must...
Definition named_parameters.hpp:697
auto recv_count(int count)
Passes count as recv count to the underlying call.
Definition named_parameters.hpp:490
auto isend(Args... args) const
Definition isend.hpp:80
auto recv(Args... args) const
Definition recv.hpp:83
ParameterType
Each input parameter to one of the MPI calls wrapped by KaMPIng needs to has one of the following tag...
Definition named_parameter_types.hpp:33
@ request
Tag used to represent an MPI_Request.
Template magic to check named parameters passed to wrappers at compile time.
#define KAMPING_REQUIRED_PARAMETERS(...)
Wrapper to pass (possibly empty) list of parameter type names as required parameters to KAMPING_CHECK...
Definition named_parameter_check.hpp:52
#define KAMPING_OPTIONAL_PARAMETERS(...)
Wrapper to pass (possibly empty) list of parameter type names as optional parameters to KAMPING_CHECK...
Definition named_parameter_check.hpp:58
#define KAMPING_CHECK_PARAMETERS(args, required, optional)
Assertion macro that checks if passed parameters are correct, i.e., all parameter types are unique,...
Definition named_parameter_check.hpp:80
Template magic to implement named parameters in cpp.
Factory methods for buffer wrappers.
kamping::internal::min_impl< T > min
builtin minimum operation (aka MPI_MIN)
Definition mpi_ops.hpp:149
STL namespace.
tag type to indicate that the value_type should be inferred from the container
Definition data_buffer.hpp:320
Helper class for using CRTP for mixins. Which are used to implement kamping plugins.
Definition plugin_helpers.hpp:32
Encapsulates a single intermediate result (value) and its index.
Definition reproducible_reduce.hpp:27
T value
Intermediate value during calculation.
Definition reproducible_reduce.hpp:31
size_t index
Global index according to reduction order.
Definition reproducible_reduce.hpp:29