tiny_dnn  1.0.0
A header only, dependency-free deep learning framework in C++11
parallel_for.h
1 /*
2  Copyright (c) 2016, Taiga Nomi
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are met:
7  * Redistributions of source code must retain the above copyright
8  notice, this list of conditions and the following disclaimer.
9  * Redistributions in binary form must reproduce the above copyright
10  notice, this list of conditions and the following disclaimer in the
11  documentation and/or other materials provided with the distribution.
12  * Neither the name of the <organization> nor the
13  names of its contributors may be used to endorse or promote products
14  derived from this software without specific prior written permission.
15 
16  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
17  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
20  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 #pragma once
28 #include <vector>
29 #include <type_traits>
30 #include <limits>
31 #include <cassert>
32 #include <cstdio>
33 #include <string>
34 #include "aligned_allocator.h"
35 #include "nn_error.h"
36 #include "tiny_dnn/config.h"
37 
38 #ifdef CNN_USE_TBB
39 #ifndef NOMINMAX
40 #define NOMINMAX // tbb includes windows.h in tbb/machine/windows_api.h
41 #endif
42 #include <tbb/tbb.h>
43 #include <tbb/task_group.h>
44 #endif
45 
46 #ifndef CNN_USE_OMP
47 #include <thread>
48 #include <future>
49 #endif
50 
51 namespace tiny_dnn {
52 
53 #ifdef CNN_USE_TBB
54 
55 static tbb::task_scheduler_init tbbScheduler(tbb::task_scheduler_init::automatic);//tbb::task_scheduler_init::deferred);
56 
57 typedef tbb::blocked_range<int> blocked_range;
58 
59 template<typename Func>
60 void parallel_for(int begin, int end, const Func& f, int grainsize) {
61  tbb::parallel_for(blocked_range(begin, end, end - begin > grainsize ? grainsize : 1), f);
62 }
63 template<typename Func>
64 void xparallel_for(int begin, int end, const Func& f) {
65  f(blocked_range(begin, end, 100));
66 }
67 
68 #else
69 
70 struct blocked_range {
71  typedef int const_iterator;
72 
73  blocked_range(int begin, int end) : begin_(begin), end_(end) {}
74  blocked_range(size_t begin, size_t end) : begin_(static_cast<int>(begin)), end_(static_cast<int>(end)) {}
75 
76  const_iterator begin() const { return begin_; }
77  const_iterator end() const { return end_; }
78 private:
79  int begin_;
80  int end_;
81 };
82 
83 template<typename Func>
84 void xparallel_for(size_t begin, size_t end, const Func& f) {
85  blocked_range r(begin, end);
86  f(r);
87 }
88 
89 #if defined(CNN_USE_OMP)
90 
91 template<typename Func>
92 void parallel_for(int begin, int end, const Func& f, int /*grainsize*/) {
93  #pragma omp parallel for
94  for (int i=begin; i<end; ++i)
95  f(blocked_range(i,i+1));
96 }
97 
98 #elif defined(CNN_SINGLE_THREAD)
99 
100 template<typename Func>
101 void parallel_for(int begin, int end, const Func& f, int /*grainsize*/) {
102  xparallel_for(static_cast<size_t>(begin), static_cast<size_t>(end), f);
103 }
104 
105 #else
106 
107 template<typename Func>
108 void parallel_for(int start, int end, const Func &f, int /*grainsize*/) {
109  int nthreads = std::thread::hardware_concurrency();
110  int blockSize = (end - start) / nthreads;
111  if (blockSize*nthreads < end - start)
112  blockSize++;
113 
114  std::vector<std::future<void>> futures;
115 
116  int blockStart = start;
117  int blockEnd = blockStart + blockSize;
118  if (blockEnd > end) blockEnd = end;
119 
120  for (int i = 0; i < nthreads; i++) {
121  futures.push_back(std::move(std::async(std::launch::async, [blockStart, blockEnd, &f] {
122  f(blocked_range(blockStart, blockEnd));
123  })));
124 
125  blockStart += blockSize;
126  blockEnd = blockStart + blockSize;
127  if (blockStart >= end) break;
128  if (blockEnd > end) blockEnd = end;
129  }
130 
131  for (auto &future : futures)
132  future.wait();
133 }
134 
135 #endif
136 
137 #endif // CNN_USE_TBB
138 
139 template<typename T, typename U>
140 bool value_representation(U const &value) {
141  return static_cast<U>(static_cast<T>(value)) == value;
142 }
143 
144 template<typename T, typename Func>
145 inline
146 void for_(std::true_type, bool parallelize, int begin, T end, Func f, int grainsize = 100){
147  parallelize = parallelize && value_representation<int>(end);
148  parallelize ? parallel_for(begin, static_cast<int>(end), f, grainsize) :
149  xparallel_for(begin, static_cast<int>(end), f);
150 }
151 
152 template<typename T, typename Func>
153 inline
154 void for_(std::false_type, bool parallelize, int begin, T end, Func f, int grainsize = 100){
155  parallelize ? parallel_for(begin, static_cast<int>(end), f, grainsize) : xparallel_for(begin, end, f);
156 }
157 
158 template<typename T, typename Func>
159 inline
160 void for_(bool parallelize, int begin, T end, Func f, int grainsize = 100) {
161  static_assert(std::is_integral<T>::value, "end must be integral type");
162  for_(typename std::is_unsigned<T>::type(), parallelize, begin, end, f, grainsize);
163 }
164 
165 template <typename T, typename Func>
166 void for_i(bool parallelize, T size, Func f, int grainsize = 100)
167 {
168  for_(parallelize, 0, size, [&](const blocked_range& r) {
169 #ifdef CNN_USE_OMP
170 #pragma omp parallel for
171 #endif
172  for (int i = r.begin(); i < r.end(); i++)
173  f(i);
174  }, grainsize);
175 }
176 
177 template <typename T, typename Func>
178 void for_i(T size, Func f, int grainsize = 100) {
179  for_i(true, size, f, grainsize);
180 }
181 
182 } // namespace tiny_dnn
Definition: parallel_for.h:70