2 // -*-===----------------------------------------------------------------------===//
4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
7 // See https://llvm.org/LICENSE.txt for license information.
9 //===----------------------------------------------------------------------===//
11 #ifndef _PSTL_INTERNAL_OMP_UTIL_H
12 #define _PSTL_INTERNAL_OMP_UTIL_H
23 #include "../parallel_backend_utils.h"
24 #include "../unseq_backend_simd.h"
27 // Portability "#pragma" definition
29 # define _PSTL_PRAGMA(x) __pragma(x)
31 # define _PSTL_PRAGMA(x) _Pragma(# x)
36 namespace __omp_backend
39 //------------------------------------------------------------------------
40 // use to cancel execution
41 //------------------------------------------------------------------------
45 // TODO: Figure out how to make cancelation work.
48 //------------------------------------------------------------------------
50 //------------------------------------------------------------------------
52 template <typename _Tp>
55 std::allocator<_Tp> __allocator_;
57 const std::size_t __buf_size_;
58 __buffer(const __buffer&) = delete;
60 operator=(const __buffer&) = delete;
63 __buffer(std::size_t __n) : __allocator_(), __ptr_(__allocator_.allocate(__n)), __buf_size_(__n) {}
65 operator bool() const { return __ptr_ != nullptr; }
72 ~__buffer() { __allocator_.deallocate(__ptr_, __buf_size_); }
75 // Preliminary size of each chunk: requires further discussion
76 inline constexpr std::size_t __default_chunk_size = 2048;
78 // Convenience function to determine when we should run serial.
79 template <typename _Iterator, std::enable_if_t<!std::is_integral<_Iterator>::value, bool> = true>
81 __should_run_serial(_Iterator __first, _Iterator __last) -> bool
83 using _difference_type = typename std::iterator_traits<_Iterator>::difference_type;
84 auto __size = std::distance(__first, __last);
85 return __size <= static_cast<_difference_type>(__default_chunk_size);
88 template <typename _Index, std::enable_if_t<std::is_integral<_Index>::value, bool> = true>
90 __should_run_serial(_Index __first, _Index __last) -> bool
92 using _difference_type = _Index;
93 auto __size = __last - __first;
94 return __size <= static_cast<_difference_type>(__default_chunk_size);
97 struct __chunk_metrics
99 std::size_t __n_chunks;
100 std::size_t __chunk_size;
101 std::size_t __first_chunk_size;
104 // The iteration space partitioner according to __requested_chunk_size
105 template <class _RandomAccessIterator, class _Size = std::size_t>
107 __chunk_partitioner(_RandomAccessIterator __first, _RandomAccessIterator __last,
108 _Size __requested_chunk_size = __default_chunk_size) -> __chunk_metrics
111 * This algorithm improves distribution of elements in chunks by avoiding
112 * small tail chunks. The leftover elements that do not fit neatly into
113 * the chunk size are redistributed to early chunks. This improves
114 * utilization of the processor's prefetch and reduces the number of
118 const _Size __n = __last - __first;
119 _Size __n_chunks = 0;
120 _Size __chunk_size = 0;
121 _Size __first_chunk_size = 0;
122 if (__n < __requested_chunk_size)
125 __first_chunk_size = __n;
127 return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
130 __n_chunks = (__n / __requested_chunk_size) + 1;
131 __chunk_size = __n / __n_chunks;
132 __first_chunk_size = __chunk_size;
133 const _Size __n_leftover_items = __n - (__n_chunks * __chunk_size);
135 if (__n_leftover_items == __chunk_size)
138 return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
140 else if (__n_leftover_items == 0)
142 __first_chunk_size = __chunk_size;
143 return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
146 const _Size __n_extra_items_per_chunk = __n_leftover_items / __n_chunks;
147 const _Size __n_final_leftover_items = __n_leftover_items - (__n_extra_items_per_chunk * __n_chunks);
149 __chunk_size += __n_extra_items_per_chunk;
150 __first_chunk_size = __chunk_size + __n_final_leftover_items;
152 return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
155 template <typename _Iterator, typename _Index, typename _Func>
157 __process_chunk(const __chunk_metrics& __metrics, _Iterator __base, _Index __chunk_index, _Func __f)
159 auto __this_chunk_size = __chunk_index == 0 ? __metrics.__first_chunk_size : __metrics.__chunk_size;
160 auto __index = __chunk_index == 0 ? 0
161 : (__chunk_index * __metrics.__chunk_size) +
162 (__metrics.__first_chunk_size - __metrics.__chunk_size);
163 auto __first = __base + __index;
164 auto __last = __first + __this_chunk_size;
165 __f(__first, __last);
168 } // namespace __omp_backend
169 } // namespace __pstl
171 #endif // _PSTL_INTERNAL_OMP_UTIL_H