29 #include <type_traits>
34 #include "aligned_allocator.h"
36 #include "tiny_dnn/config.h"
43 #include <tbb/task_group.h>
55 static tbb::task_scheduler_init tbbScheduler(tbb::task_scheduler_init::automatic);
57 typedef tbb::blocked_range<int> blocked_range;
59 template<
typename Func>
60 void parallel_for(
int begin,
int end,
const Func& f,
int grainsize) {
61 tbb::parallel_for(blocked_range(begin, end, end - begin > grainsize ? grainsize : 1), f);
63 template<
typename Func>
64 void xparallel_for(
int begin,
int end,
const Func& f) {
65 f(blocked_range(begin, end, 100));
71 typedef int const_iterator;
73 blocked_range(
int begin,
int end) : begin_(begin), end_(end) {}
74 blocked_range(
size_t begin,
size_t end) : begin_(
static_cast<int>(begin)), end_(
static_cast<int>(end)) {}
76 const_iterator begin()
const {
return begin_; }
77 const_iterator end()
const {
return end_; }
83 template<
typename Func>
84 void xparallel_for(
size_t begin,
size_t end,
const Func& f) {
89 #if defined(CNN_USE_OMP)
91 template<
typename Func>
92 void parallel_for(
int begin,
int end,
const Func& f,
int ) {
93 #pragma omp parallel for
94 for (
int i=begin; i<end; ++i)
95 f(blocked_range(i,i+1));
98 #elif defined(CNN_SINGLE_THREAD)
100 template<
typename Func>
101 void parallel_for(
int begin,
int end,
const Func& f,
int ) {
102 xparallel_for(
static_cast<size_t>(begin),
static_cast<size_t>(end), f);
107 template<
typename Func>
108 void parallel_for(
int start,
int end,
const Func &f,
int ) {
109 int nthreads = std::thread::hardware_concurrency();
110 int blockSize = (end - start) / nthreads;
111 if (blockSize*nthreads < end - start)
114 std::vector<std::future<void>> futures;
116 int blockStart = start;
117 int blockEnd = blockStart + blockSize;
118 if (blockEnd > end) blockEnd = end;
120 for (
int i = 0; i < nthreads; i++) {
121 futures.push_back(std::move(std::async(std::launch::async, [blockStart, blockEnd, &f] {
122 f(blocked_range(blockStart, blockEnd));
125 blockStart += blockSize;
126 blockEnd = blockStart + blockSize;
127 if (blockStart >= end)
break;
128 if (blockEnd > end) blockEnd = end;
131 for (
auto &future : futures)
139 template<
typename T,
typename U>
140 bool value_representation(U
const &value) {
141 return static_cast<U
>(
static_cast<T
>(value)) == value;
144 template<
typename T,
typename Func>
146 void for_(std::true_type,
bool parallelize,
int begin, T end, Func f,
int grainsize = 100){
147 parallelize = parallelize && value_representation<int>(end);
148 parallelize ? parallel_for(begin,
static_cast<int>(end), f, grainsize) :
149 xparallel_for(begin, static_cast<int>(end), f);
152 template<
typename T,
typename Func>
154 void for_(std::false_type,
bool parallelize,
int begin, T end, Func f,
int grainsize = 100){
155 parallelize ? parallel_for(begin,
static_cast<int>(end), f, grainsize) : xparallel_for(begin, end, f);
158 template<
typename T,
typename Func>
160 void for_(
bool parallelize,
int begin, T end, Func f,
int grainsize = 100) {
161 static_assert(std::is_integral<T>::value,
"end must be integral type");
162 for_(
typename std::is_unsigned<T>::type(), parallelize, begin, end, f, grainsize);
165 template <
typename T,
typename Func>
166 void for_i(
bool parallelize, T size, Func f,
int grainsize = 100)
168 for_(parallelize, 0, size, [&](
const blocked_range& r) {
170 #pragma omp parallel for
172 for (
int i = r.begin(); i < r.end(); i++)
177 template <
typename T,
typename Func>
178 void for_i(T size, Func f,
int grainsize = 100) {
179 for_i(
true, size, f, grainsize);
Definition: parallel_for.h:70