tiny_dnn  1.0.0
A header only, dependency-free deep learning framework in C++11
backend_tiny.h
1 /*
2  Copyright (c) 2016, Taiga Nomi, Edgar Riba
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are met:
7  * Redistributions of source code must retain the above copyright
8  notice, this list of conditions and the following disclaimer.
9  * Redistributions in binary form must reproduce the above copyright
10  notice, this list of conditions and the following disclaimer in the
11  documentation and/or other materials provided with the distribution.
12  * Neither the name of the <organization> nor the
13  names of its contributors may be used to endorse or promote products
14  derived from this software without specific prior written permission.
15 
16  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
17  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
20  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 #pragma once
28 
29 #include "tiny_dnn/config.h"
30 #include "tiny_dnn/core/backend.h"
31 
32 #include "tiny_dnn/core/kernels/tiny_quantized_conv2d_kernel.h"
33 #include "tiny_dnn/core/kernels/tiny_deconv2d_kernel.h"
34 #include "tiny_dnn/core/kernels/tiny_quantized_deconv2d_kernel.h"
35 #include "tiny_dnn/core/kernels/tiny_deconv2d_back_kernel.h"
36 #ifdef CNN_USE_GEMMLOWP
37 #include "tiny_dnn/core/kernels/tiny_quantized_fully_connected_kernel.h"
38 #endif
39 
40 namespace tiny_dnn {
41 namespace core {
42 
43 class tiny_backend : public backend {
44  public:
45  // context holds solution-dependent parameters
46  // context should be able to hold any types of structures (like boost::any)
47 
48  // convolution
49  tiny_backend(conv_params* params,
50  std::function<void(const tensor_t&)> f1,
51  std::function<void(const tensor_t&, tensor_t&)> f2,
52  std::function<void(const tensor_t&, const tensor_t&, tensor_t&)> f3,
54  : params_c_(params)
55  , conv_layer_worker_storage_(ptr)
56  , copy_and_pad_input(f1)
57  , copy_and_unpad_delta(f2)
58  , backward_activation(f3) {}
59 
60  // deconvolution
62  std::function<void(const tensor_t&)> f1,
63  std::function<void(const tensor_t&, tensor_t&)> f2,
64  std::function<void(const tensor_t&, const tensor_t&, tensor_t&)> f3,
66  : params_d_(params)
67  , deconv_layer_worker_storage_(ptr)
68  , copy_and_unpad_output(f1)
69  , copy_and_pad_delta(f2)
70  , backward_activation(f3) {}
71 
72  // maxpooling
73  tiny_backend(std::vector<std::vector<serial_size_t>>* out2in,
74  std::vector<serial_size_t>* in2out,
75  std::function<void(const tensor_t&, const tensor_t&, tensor_t&)> f,
77  : max_pooling_layer_worker_storage_(ptr)
78  , out2in_(out2in)
79  , in2out_(in2out)
80  , backward_activation(f) {}
81 
82  // fully_connected
83  tiny_backend(fully_params* params,
84  std::function<void(const tensor_t&, const tensor_t&, tensor_t&)> f)
85  : params_f_(params)
86  , backward_activation(f) {}
87 
88  // core math functions
89 
90  void conv2d(const std::vector<tensor_t*>& in_data,
91  std::vector<tensor_t*>& out_data) override {
92  /*copy_and_pad_input(*in_data[0]);
93  const vec_t& W = (*in_data[1])[0];
94  const vec_t& bias = (*in_data[2])[0];
95  tensor_t& a = *out_data[1];
96  const std::vector<const vec_t*> &in = (*conv_layer_worker_storage_).prev_out_padded_; // input // NOLINT
97 
98  fill_tensor(a, float_t(0));
99 
100  kernels::tiny_conv2d_kernel(*params_c_,
101  in, W, bias, a, layer_->parallelize());*/
102  }
103 
104  // quantized convolution
105  void conv2d_q(const std::vector<tensor_t*>& in_data,
106  std::vector<tensor_t*>& out_data) override {
107  copy_and_pad_input(*in_data[0]);
108  const vec_t& W = (*in_data[1])[0];
109  const vec_t& bias = (*in_data[2])[0];
110  tensor_t& a = *out_data[1];
111  const std::vector<const vec_t*> &in = (*conv_layer_worker_storage_).prev_out_padded_; // input // NOLINT
112 
113  fill_tensor(a, float_t(0));
114 
115  for (serial_size_t i = 0; i < in.size(); i++) {
116  kernels::tiny_quantized_conv2d_kernel(*params_c_,
117  *in[i], W, bias, a[i], layer_->parallelize());
118  }
119  }
120 
121  // efficient quantization without abundant quantization/dequantization
122  void conv2d_eq(const std::vector<tensor_t*>& in_data,
123  std::vector<tensor_t*>& out_data) override {
124  copy_and_pad_input(*in_data[0]);
125  const vec_t& W = (*in_data[1])[0];
126  const vec_t& bias = (*in_data[2])[0];
127  const tensor_t& in_r = *in_data[3];
128  const vec_t& W_r = (*in_data[4])[0];
129  const vec_t& b_r = (*in_data[5])[0];
130  tensor_t& a = *out_data[1];
131  tensor_t& a_r = *out_data[2];
132 
133  const std::vector<const vec_t*> &in = (*conv_layer_worker_storage_).prev_out_padded_; // input // NOLINT
134 
135  fill_tensor(a, float_t(0));
136  for (serial_size_t i = 0; i < in.size(); i++) {
137  kernels::tiny_quantized_conv2d_kernel(*params_c_,
138  *in[i], W, bias, in_r[i], W_r, b_r, a[i], a_r[i], layer_->parallelize());
139  }
140  }
141 
142  void conv2d(const std::vector<tensor_t*>& in_data,
143  const std::vector<tensor_t*>& out_data,
144  std::vector<tensor_t*>& out_grad,
145  std::vector<tensor_t*>& in_grad) override {
146  /*conv_layer_worker_specific_storage& cws = (*conv_layer_worker_storage_);
147 
148  std::vector<const vec_t*>& prev_out = cws.prev_out_padded_;
149  const vec_t& W = (*in_data[1])[0];
150  tensor_t& dW = *in_grad[1];
151  tensor_t& db = *in_grad[2];
152  tensor_t& curr_delta = *out_grad[1];
153  tensor_t* prev_delta = (params_c_->pad_type == padding::same) ?
154  &cws.prev_delta_padded_ : in_grad[0];
155 
156  assert(W.size() == params_c_->weight.size());
157  assert(dW[0].size() == params_c_->weight.size());
158  assert(curr_delta[0].size() == layer_->out_shape()[0].size());
159 
160  backward_activation(*out_grad[0], *out_data[0], curr_delta);
161 
162  fill_tensor(*prev_delta, float_t(0));
163 
164  kernels::tiny_conv2d_back_kernel(*params_c_,
165  prev_out, W, dW, db, curr_delta, prev_delta);
166 
167  if (params_c_->pad_type == padding::same) {
168  copy_and_unpad_delta(cws.prev_delta_padded_, *in_grad[0]);
169  }*/
170  }
171 
172  void conv2d_q(const std::vector<tensor_t*>& in_data,
173  const std::vector<tensor_t*>& out_data,
174  std::vector<tensor_t*>& out_grad,
175  std::vector<tensor_t*>& in_grad) override {
176  conv_layer_worker_specific_storage& cws = (*conv_layer_worker_storage_);
177 
178  std::vector<const vec_t*>& prev_out = cws.prev_out_padded_;
179  const vec_t& W = (*in_data[1])[0];
180  tensor_t& dW = *in_grad[1];
181  tensor_t& db = *in_grad[2];
182  tensor_t& curr_delta = *out_grad[1];
183  tensor_t* prev_delta = (params_c_->pad_type == padding::same) ?
184  &cws.prev_delta_padded_ : in_grad[0];
185 
186  assert(W.size() == params_c_->weight.size());
187  assert(dW[0].size() == params_c_->weight.size());
188  assert(curr_delta[0].size() == layer_->out_shape()[0].size());
189 
190  backward_activation(*out_grad[0], *out_data[0], curr_delta);
191 
192  fill_tensor(*prev_delta, float_t(0));
193 
194  for (serial_size_t i = 0; i < prev_out.size(); i++) {
195  kernels::tiny_quantized_conv2d_back_kernel(*params_c_,
196  *prev_out[i], W, dW[i], db[i], curr_delta[i], &(*prev_delta)[i]);
197  }
198 
199  if (params_c_->pad_type == padding::same) {
200  copy_and_unpad_delta(cws.prev_delta_padded_, *in_grad[0]);
201  }
202  }
203 
204  void deconv2d(const std::vector<tensor_t*>& in_data,
205  std::vector<tensor_t*>& out_data) override {
206  (*deconv_layer_worker_storage_).prev_out_ = in_data[0];
207  const vec_t& W = (*in_data[1])[0];
208  const vec_t& bias = (*in_data[2])[0];
209  tensor_t& a = *out_data[1];
210  const tensor_t& in = *in_data[0]; // input
211 
212  fill_tensor(a, float_t(0), params_d_->out.size()); // deconv2d-kernel requires padded size buffer
213 
214  kernels::tiny_deconv2d_kernel(*params_d_,
215  in, W, bias, a, layer_->parallelize());
216 
217  copy_and_unpad_output(a);
218  a = *(*deconv_layer_worker_storage_).curr_out_unpadded_;
219  }
220 
221  // quantized deconvolution
222  void deconv2d_q(const std::vector<tensor_t*>& in_data,
223  std::vector<tensor_t*>& out_data) override {
224  (*deconv_layer_worker_storage_).prev_out_ = in_data[0];
225  const tensor_t& in = *in_data[0]; // input
226  const vec_t& W = (*in_data[1])[0];
227  const vec_t& bias = (*in_data[2])[0];
228  tensor_t& a = *out_data[1];
229 
230  fill_tensor(a, float_t(0), params_d_->out.size()); // deconv2d-kernel requires padded size buffer
231 
232  for (serial_size_t i = 0; i < in.size(); i++) {
233  kernels::tiny_quantized_deconv2d_kernel(*params_d_,
234  in[i], W, bias, a[i], layer_->parallelize());
235  }
236 
237  copy_and_unpad_output(a);
238  a = *(*deconv_layer_worker_storage_).curr_out_unpadded_;
239  }
240 
241  // efficient quantization without abundant quantization/dequantization
242  void deconv2d_eq(const std::vector<tensor_t*>& in_data,
243  std::vector<tensor_t*>& out_data) override {
244  (*deconv_layer_worker_storage_).prev_out_ = in_data[0];
245  const tensor_t& in = *in_data[0]; // input
246  const vec_t& W = (*in_data[1])[0];
247  const vec_t& bias = (*in_data[2])[0];
248  const tensor_t& in_r = *in_data[3];
249  const vec_t& W_r = (*in_data[4])[0];
250  const vec_t& b_r = (*in_data[5])[0];
251  tensor_t& a = *out_data[1];
252  tensor_t& a_r = *out_data[2];
253 
254  fill_tensor(a, float_t(0), params_d_->out.size()); // deconv2d-kernel requires padded size buffer
255 
256  for (serial_size_t i = 0; i < in.size(); i++) {
257  kernels::tiny_quantized_deconv2d_kernel(*params_d_,
258  in[i], W, bias, in_r[i], W_r, b_r, a[i], a_r[i], layer_->parallelize());
259  }
260 
261  copy_and_unpad_output(a);
262  a = *(*deconv_layer_worker_storage_).curr_out_unpadded_;
263  }
264 
265  void deconv2d(const std::vector<tensor_t*>& in_data,
266  const std::vector<tensor_t*>& out_data,
267  std::vector<tensor_t*>& out_grad,
268  std::vector<tensor_t*>& in_grad) override {
269 
270  deconv_layer_worker_specific_storage& cws = (*deconv_layer_worker_storage_);
271  if (params_d_->pad_type == padding::same)
272  copy_and_pad_delta(cws.curr_delta_padded, *in_grad[0]);
273 
274  const tensor_t& prev_out = *(cws.prev_out_);
275  const vec_t& W = (*in_data[1])[0];
276  tensor_t& dW = *in_grad[1];
277  tensor_t& db = *in_grad[2];
278  tensor_t& curr_delta = (params_d_->pad_type == padding::same) ? cws.curr_delta_padded : *out_grad[1];
279  tensor_t* prev_delta = in_grad[0];
280 
281  assert(W.size() == params_d_->weight.size());
282  assert(dW[0].size() == params_d_->weight.size());
283  assert(curr_delta[0].size() == layer_->out_shape()[0].size());
284 
285  backward_activation(*out_grad[0], *out_data[0], curr_delta);
286 
287  fill_tensor(*prev_delta, float_t(0));
288 
289  kernels::tiny_deconv2d_back_kernel(*params_d_,
290  prev_out, W, dW, db, curr_delta, prev_delta);
291  }
292 
293  void deconv2d_q(const std::vector<tensor_t*>& in_data,
294  const std::vector<tensor_t*>& out_data,
295  std::vector<tensor_t*>& out_grad,
296  std::vector<tensor_t*>& in_grad) override {
297 
298  deconv_layer_worker_specific_storage& cws = (*deconv_layer_worker_storage_);
299  if (params_d_->pad_type == padding::same)
300  copy_and_pad_delta(cws.curr_delta_padded, *in_grad[0]);
301 
302  const tensor_t& prev_out = *(cws.prev_out_);
303  const vec_t& W = (*in_data[1])[0];
304  tensor_t& dW = *in_grad[1];
305  tensor_t& db = *in_grad[2];
306  tensor_t& curr_delta = (params_d_->pad_type == padding::same) ? cws.curr_delta_padded : *out_grad[1];
307  tensor_t* prev_delta = in_grad[0];
308 
309  assert(W.size() == params_d_->weight.size());
310  assert(dW[0].size() == params_d_->weight.size());
311  assert(curr_delta[0].size() == layer_->out_shape()[0].size());
312 
313  backward_activation(*out_grad[0], *out_data[0], curr_delta);
314 
315  fill_tensor(*prev_delta, float_t(0));
316 
317  for (serial_size_t i = 0; i < prev_out.size(); i++) {
318  kernels::tiny_quantized_deconv2d_back_kernel(*params_d_,
319  prev_out[i], W, dW[i], db[i], curr_delta[i], &(*prev_delta)[i]);
320  }
321  }
322 
323  void maxpool(const std::vector<tensor_t*>& in_data,
324  std::vector<tensor_t*>& out_data) override {
325  // just to fix warning. Remove in a future
326  if (max_pooling_layer_worker_storage_) {}
327  if (out2in_) {}
328  if (in2out_) {}
329 
330  /*const tensor_t& in = *in_data[0];
331  tensor_t& a = *out_data[1];
332  std::vector<std::vector<serial_size_t>>& max_idx =
333  (*max_pooling_layer_worker_storage_).out2inmax_;
334 
335  kernels::tiny_maxpool_kernel(in, a,
336  max_idx, *out2in_, layer_->parallelize());*/
337  }
338 
339  void maxpool(const std::vector<tensor_t*>& in_data,
340  const std::vector<tensor_t*>& out_data,
341  std::vector<tensor_t*>& out_grad,
342  std::vector<tensor_t*>& in_grad) override {
343  /*tensor_t& prev_delta = *in_grad[0];
344  tensor_t& curr_delta = *out_grad[1];
345  std::vector<std::vector<serial_size_t>>& max_idx =
346  (*max_pooling_layer_worker_storage_).out2inmax_;
347 
348  backward_activation(*out_grad[0], *out_data[0], curr_delta);
349 
350  kernels::tiny_maxpool_back_kernel(prev_delta, curr_delta,
351  max_idx, *in2out_, layer_->parallelize());*/
352  }
353 
354  void fully(const std::vector<tensor_t*>& in_data,
355  std::vector<tensor_t*>& out_data) override {
356  /*const tensor_t& in = *in_data[0];
357  const vec_t& W = (*in_data[1])[0];
358  tensor_t& a = *out_data[1];
359 
360  kernels::tiny_fully_connected_kernel(*params_f_,
361  in, W, params_f_->has_bias_ ? (*in_data[2])[0] : vec_t(),
362  a, layer_->parallelize());*/
363  }
364 
365  void fully_q(const std::vector<tensor_t*>& in_data,
366  std::vector<tensor_t*>& out_data) override {
367 #ifdef CNN_USE_GEMMLOWP
368  const tensor_t& in = *in_data[0];
369  const vec_t& W = (*in_data[1])[0];
370  tensor_t& a = *out_data[1];
371 
372  for (serial_size_t i = 0; i < in.size(); i++) {
373  kernels::tiny_quantized_fully_connected_kernel(*params_f_,
374  in[i], W, params_f_->has_bias_ ? (*in_data[2])[0] : vec_t(),
375  a[i], layer_->parallelize());
376  }
377 #else
378  throw nn_not_implemented_error("quantized fully op requires gemmlowp library. please define CNN_USE_GEMMLOWP");
379 #endif
380  }
381 
382  void fully_eq(const std::vector<tensor_t*>& in_data,
383  std::vector<tensor_t*>& out_data) override {
384 #ifdef CNN_USE_GEMMLOWP
385  const tensor_t& in = *in_data[0];
386  const vec_t& W = (*in_data[1])[0];
387  vec_t& b = (*in_data[2])[0];
388  const tensor_t& in_r = *in_data[3];
389  const vec_t& W_r = (*in_data[4])[0];
390  const vec_t& b_r = (*in_data[5])[0];
391  tensor_t& a = *out_data[1];
392  tensor_t& a_r = *out_data[2];
393 
394  for (serial_size_t i = 0; i < in.size(); i++) {
395  kernels::tiny_quantized_fully_connected_kernel(*params_f_,
396  in[i], W, b, in_r[i], W_r, b_r, a[i], a_r[i], layer_->parallelize());
397  }
398 #else
399  throw nn_not_implemented_error("quantized fully op requires gemmlowp library. please define CNN_USE_GEMMLOWP");
400 #endif
401  }
402 
403  void fully(const std::vector<tensor_t*>& in_data,
404  const std::vector<tensor_t*>& out_data,
405  std::vector<tensor_t*>& out_grad,
406  std::vector<tensor_t*>& in_grad) override {
407  /*const tensor_t& prev_out = *in_data[0];
408  const vec_t& W = (*in_data[1])[0];
409  tensor_t& dW = *in_grad[1];
410  tensor_t& db = *in_grad[2];
411  tensor_t& prev_delta = *in_grad[0];
412  tensor_t& curr_delta = *out_grad[1];
413 
414  backward_activation(*out_grad[0], *out_data[0], curr_delta);
415 
416  kernels::tiny_fully_connected_back_kernel(*params_f_, prev_out,
417  W, dW, prev_delta, curr_delta, db, layer_->parallelize());*/
418  }
419 
420  void fully_q(const std::vector<tensor_t*>& in_data,
421  const std::vector<tensor_t*>& out_data,
422  std::vector<tensor_t*>& out_grad,
423  std::vector<tensor_t*>& in_grad) override {
424 #ifdef CNN_USE_GEMMLOWP
425  const tensor_t& prev_out = *in_data[0];
426  const vec_t& W = (*in_data[1])[0];
427  tensor_t& dW = *in_grad[1];
428  tensor_t& db = *in_grad[2];
429  tensor_t& prev_delta = *in_grad[0];
430  tensor_t& curr_delta = *out_grad[1];
431 
432  backward_activation(*out_grad[0], *out_data[0], curr_delta);
433 
434  for (serial_size_t i = 0; i < prev_out.size(); i++) {
435  kernels::tiny_quantized_fully_connected_back_kernel(*params_f_, prev_out[i],
436  W, dW[i], prev_delta[i], curr_delta[i], db[i], layer_->parallelize());
437  }
438 #else
439  throw nn_not_implemented_error("quantized fully op requires gemmlowp library. please define CNN_USE_GEMMLOWP");
440 #endif
441  }
442 
443  backend_t type() const override { return default_engine(); }
444 
445  private:
446  /* Pointer to the convolution parameters */
447  conv_params* params_c_;
448  deconv_params* params_d_;
449  fully_params* params_f_;
450 
451  /* Pointer to the workers */
452  conv_layer_worker_specific_storage* conv_layer_worker_storage_;
453  deconv_layer_worker_specific_storage* deconv_layer_worker_storage_;
454  max_pooling_layer_worker_specific_storage* max_pooling_layer_worker_storage_;
455  std::vector<std::vector<serial_size_t>>* out2in_;
456  std::vector<serial_size_t>* in2out_;
457 
458  /* Pointers to parent class functions */
459  std::function<void(const tensor_t&)> copy_and_pad_input;
460  std::function<void(const tensor_t&)> copy_and_unpad_output;
461  std::function<void(const tensor_t&, tensor_t&)> copy_and_unpad_delta;
462  std::function<void(const tensor_t&, tensor_t&)> copy_and_pad_delta;
463  std::function<void(const tensor_t&, const tensor_t&, tensor_t&)> backward_activation;
464 };
465 
466 } // namespace core
467 } // namespace tiny_dnn
Definition: backend.h:68
Definition: conv_params.h:92
Definition: fully_params.h:34
Definition: backend_tiny.h:43
virtual std::vector< shape3d > out_shape() const =0
array of output shapes (width x height x depth)
Definition: nn_error.h:83
Definition: deconv_params.h:39