contrib/llvm-project/libcxx/include/__pstl/internal/omp/util.h

   1 // -*- C++ -*-
   2 // -*-===----------------------------------------------------------------------===//
   3 //
   4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   5 //
   6 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   7 // See https://llvm.org/LICENSE.txt for license information.
   8 //
   9 //===----------------------------------------------------------------------===//
  10
  11 #ifndef _PSTL_INTERNAL_OMP_UTIL_H
  12 #define _PSTL_INTERNAL_OMP_UTIL_H
  13
  14 #include <algorithm>
  15 #include <atomic>
  16 #include <iterator>
  17 #include <cstddef>
  18 #include <cstdio>
  19 #include <memory>
  20 #include <vector>
  21 #include <omp.h>
  22
  23 #include "../parallel_backend_utils.h"
  24 #include "../unseq_backend_simd.h"
  25 #include "../utils.h"
  26
  27 // Portability "#pragma" definition
  28 #ifdef _MSC_VER
  29 #    define _PSTL_PRAGMA(x) __pragma(x)
  30 #else
  31 #    define _PSTL_PRAGMA(x) _Pragma(#    x)
  32 #endif
  33
  34 namespace __pstl
  35 {
  36 namespace __omp_backend
  37 {
  38
  39 //------------------------------------------------------------------------
  40 // use to cancel execution
  41 //------------------------------------------------------------------------
  42 inline void
  43 __cancel_execution()
  44 {
  45     // TODO: Figure out how to make cancelation work.
  46 }
  47
  48 //------------------------------------------------------------------------
  49 // raw buffer
  50 //------------------------------------------------------------------------
  51
  52 template <typename _Tp>
  53 class __buffer
  54 {
  55     std::allocator<_Tp> __allocator_;
  56     _Tp* __ptr_;
  57     const std::size_t __buf_size_;
  58     __buffer(const __buffer&) = delete;
  59     void
  60     operator=(const __buffer&) = delete;
  61
  62   public:
  63     __buffer(std::size_t __n) : __allocator_(), __ptr_(__allocator_.allocate(__n)), __buf_size_(__n) {}
  64
  65     operator bool() const { return __ptr_ != nullptr; }
  66
  67     _Tp*
  68     get() const
  69     {
  70         return __ptr_;
  71     }
  72     ~__buffer() { __allocator_.deallocate(__ptr_, __buf_size_); }
  73 };
  74
  75 // Preliminary size of each chunk: requires further discussion
  76 inline constexpr std::size_t __default_chunk_size = 2048;
  77
  78 // Convenience function to determine when we should run serial.
  79 template <typename _Iterator, std::enable_if_t<!std::is_integral<_Iterator>::value, bool> = true>
  80 constexpr auto
  81 __should_run_serial(_Iterator __first, _Iterator __last) -> bool
  82 {
  83     using _difference_type = typename std::iterator_traits<_Iterator>::difference_type;
  84     auto __size = std::distance(__first, __last);
  85     return __size <= static_cast<_difference_type>(__default_chunk_size);
  86 }
  87
  88 template <typename _Index, std::enable_if_t<std::is_integral<_Index>::value, bool> = true>
  89 constexpr auto
  90 __should_run_serial(_Index __first, _Index __last) -> bool
  91 {
  92     using _difference_type = _Index;
  93     auto __size = __last - __first;
  94     return __size <= static_cast<_difference_type>(__default_chunk_size);
  95 }
  96
  97 struct __chunk_metrics
  98 {
  99     std::size_t __n_chunks;
 100     std::size_t __chunk_size;
 101     std::size_t __first_chunk_size;
 102 };
 103
 104 // The iteration space partitioner according to __requested_chunk_size
 105 template <class _RandomAccessIterator, class _Size = std::size_t>
 106 auto
 107 __chunk_partitioner(_RandomAccessIterator __first, _RandomAccessIterator __last,
 108                     _Size __requested_chunk_size = __default_chunk_size) -> __chunk_metrics
 109 {
 110     /*
 111      * This algorithm improves distribution of elements in chunks by avoiding
 112      * small tail chunks. The leftover elements that do not fit neatly into
 113      * the chunk size are redistributed to early chunks. This improves
 114      * utilization of the processor's prefetch and reduces the number of
 115      * tasks needed by 1.
 116      */
 117
 118     const _Size __n = __last - __first;
 119     _Size __n_chunks = 0;
 120     _Size __chunk_size = 0;
 121     _Size __first_chunk_size = 0;
 122     if (__n < __requested_chunk_size)
 123     {
 124         __chunk_size = __n;
 125         __first_chunk_size = __n;
 126         __n_chunks = 1;
 127         return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
 128     }
 129
 130     __n_chunks = (__n / __requested_chunk_size) + 1;
 131     __chunk_size = __n / __n_chunks;
 132     __first_chunk_size = __chunk_size;
 133     const _Size __n_leftover_items = __n - (__n_chunks * __chunk_size);
 134
 135     if (__n_leftover_items == __chunk_size)
 136     {
 137         __n_chunks += 1;
 138         return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
 139     }
 140     else if (__n_leftover_items == 0)
 141     {
 142         __first_chunk_size = __chunk_size;
 143         return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
 144     }
 145
 146     const _Size __n_extra_items_per_chunk = __n_leftover_items / __n_chunks;
 147     const _Size __n_final_leftover_items = __n_leftover_items - (__n_extra_items_per_chunk * __n_chunks);
 148
 149     __chunk_size += __n_extra_items_per_chunk;
 150     __first_chunk_size = __chunk_size + __n_final_leftover_items;
 151
 152     return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
 153 }
 154
 155 template <typename _Iterator, typename _Index, typename _Func>
 156 void
 157 __process_chunk(const __chunk_metrics& __metrics, _Iterator __base, _Index __chunk_index, _Func __f)
 158 {
 159     auto __this_chunk_size = __chunk_index == 0 ? __metrics.__first_chunk_size : __metrics.__chunk_size;
 160     auto __index = __chunk_index == 0 ? 0
 161                                       : (__chunk_index * __metrics.__chunk_size) +
 162                                             (__metrics.__first_chunk_size - __metrics.__chunk_size);
 163     auto __first = __base + __index;
 164     auto __last = __first + __this_chunk_size;
 165     __f(__first, __last);
 166 }
 167
 168 } // namespace __omp_backend
 169 } // namespace __pstl
 170
 171 #endif // _PSTL_INTERNAL_OMP_UTIL_H