29 #include "tiny_dnn/config.h"
30 #include "tiny_dnn/core/backend.h"
32 #include "tiny_dnn/core/kernels/tiny_quantized_conv2d_kernel.h"
33 #include "tiny_dnn/core/kernels/tiny_deconv2d_kernel.h"
34 #include "tiny_dnn/core/kernels/tiny_quantized_deconv2d_kernel.h"
35 #include "tiny_dnn/core/kernels/tiny_deconv2d_back_kernel.h"
36 #ifdef CNN_USE_GEMMLOWP
37 #include "tiny_dnn/core/kernels/tiny_quantized_fully_connected_kernel.h"
50 std::function<
void(
const tensor_t&)> f1,
51 std::function<
void(
const tensor_t&, tensor_t&)> f2,
52 std::function<
void(
const tensor_t&,
const tensor_t&, tensor_t&)> f3,
55 , conv_layer_worker_storage_(ptr)
56 , copy_and_pad_input(f1)
57 , copy_and_unpad_delta(f2)
58 , backward_activation(f3) {}
62 std::function<
void(
const tensor_t&)> f1,
63 std::function<
void(
const tensor_t&, tensor_t&)> f2,
64 std::function<
void(
const tensor_t&,
const tensor_t&, tensor_t&)> f3,
67 , deconv_layer_worker_storage_(ptr)
68 , copy_and_unpad_output(f1)
69 , copy_and_pad_delta(f2)
70 , backward_activation(f3) {}
73 tiny_backend(std::vector<std::vector<serial_size_t>>* out2in,
74 std::vector<serial_size_t>* in2out,
75 std::function<
void(
const tensor_t&,
const tensor_t&, tensor_t&)> f,
77 : max_pooling_layer_worker_storage_(ptr)
80 , backward_activation(f) {}
84 std::function<
void(
const tensor_t&,
const tensor_t&, tensor_t&)> f)
86 , backward_activation(f) {}
90 void conv2d(
const std::vector<tensor_t*>& in_data,
91 std::vector<tensor_t*>& out_data)
override {
105 void conv2d_q(
const std::vector<tensor_t*>& in_data,
106 std::vector<tensor_t*>& out_data)
override {
107 copy_and_pad_input(*in_data[0]);
108 const vec_t& W = (*in_data[1])[0];
109 const vec_t& bias = (*in_data[2])[0];
110 tensor_t& a = *out_data[1];
111 const std::vector<const vec_t*> &in = (*conv_layer_worker_storage_).prev_out_padded_;
113 fill_tensor(a, float_t(0));
115 for (serial_size_t i = 0; i < in.size(); i++) {
116 kernels::tiny_quantized_conv2d_kernel(*params_c_,
117 *in[i], W, bias, a[i], layer_->parallelize());
122 void conv2d_eq(
const std::vector<tensor_t*>& in_data,
123 std::vector<tensor_t*>& out_data)
override {
124 copy_and_pad_input(*in_data[0]);
125 const vec_t& W = (*in_data[1])[0];
126 const vec_t& bias = (*in_data[2])[0];
127 const tensor_t& in_r = *in_data[3];
128 const vec_t& W_r = (*in_data[4])[0];
129 const vec_t& b_r = (*in_data[5])[0];
130 tensor_t& a = *out_data[1];
131 tensor_t& a_r = *out_data[2];
133 const std::vector<const vec_t*> &in = (*conv_layer_worker_storage_).prev_out_padded_;
135 fill_tensor(a, float_t(0));
136 for (serial_size_t i = 0; i < in.size(); i++) {
137 kernels::tiny_quantized_conv2d_kernel(*params_c_,
138 *in[i], W, bias, in_r[i], W_r, b_r, a[i], a_r[i], layer_->parallelize());
142 void conv2d(
const std::vector<tensor_t*>& in_data,
143 const std::vector<tensor_t*>& out_data,
144 std::vector<tensor_t*>& out_grad,
145 std::vector<tensor_t*>& in_grad)
override {
172 void conv2d_q(
const std::vector<tensor_t*>& in_data,
173 const std::vector<tensor_t*>& out_data,
174 std::vector<tensor_t*>& out_grad,
175 std::vector<tensor_t*>& in_grad)
override {
178 std::vector<const vec_t*>& prev_out = cws.prev_out_padded_;
179 const vec_t& W = (*in_data[1])[0];
180 tensor_t& dW = *in_grad[1];
181 tensor_t& db = *in_grad[2];
182 tensor_t& curr_delta = *out_grad[1];
183 tensor_t* prev_delta = (params_c_->pad_type == padding::same) ?
184 &cws.prev_delta_padded_ : in_grad[0];
186 assert(W.size() == params_c_->weight.size());
187 assert(dW[0].size() == params_c_->weight.size());
188 assert(curr_delta[0].size() == layer_->
out_shape()[0].size());
190 backward_activation(*out_grad[0], *out_data[0], curr_delta);
192 fill_tensor(*prev_delta, float_t(0));
194 for (serial_size_t i = 0; i < prev_out.size(); i++) {
195 kernels::tiny_quantized_conv2d_back_kernel(*params_c_,
196 *prev_out[i], W, dW[i], db[i], curr_delta[i], &(*prev_delta)[i]);
199 if (params_c_->pad_type == padding::same) {
200 copy_and_unpad_delta(cws.prev_delta_padded_, *in_grad[0]);
204 void deconv2d(
const std::vector<tensor_t*>& in_data,
205 std::vector<tensor_t*>& out_data)
override {
206 (*deconv_layer_worker_storage_).prev_out_ = in_data[0];
207 const vec_t& W = (*in_data[1])[0];
208 const vec_t& bias = (*in_data[2])[0];
209 tensor_t& a = *out_data[1];
210 const tensor_t& in = *in_data[0];
212 fill_tensor(a, float_t(0), params_d_->out.size());
214 kernels::tiny_deconv2d_kernel(*params_d_,
215 in, W, bias, a, layer_->parallelize());
217 copy_and_unpad_output(a);
218 a = *(*deconv_layer_worker_storage_).curr_out_unpadded_;
222 void deconv2d_q(
const std::vector<tensor_t*>& in_data,
223 std::vector<tensor_t*>& out_data)
override {
224 (*deconv_layer_worker_storage_).prev_out_ = in_data[0];
225 const tensor_t& in = *in_data[0];
226 const vec_t& W = (*in_data[1])[0];
227 const vec_t& bias = (*in_data[2])[0];
228 tensor_t& a = *out_data[1];
230 fill_tensor(a, float_t(0), params_d_->out.size());
232 for (serial_size_t i = 0; i < in.size(); i++) {
233 kernels::tiny_quantized_deconv2d_kernel(*params_d_,
234 in[i], W, bias, a[i], layer_->parallelize());
237 copy_and_unpad_output(a);
238 a = *(*deconv_layer_worker_storage_).curr_out_unpadded_;
242 void deconv2d_eq(
const std::vector<tensor_t*>& in_data,
243 std::vector<tensor_t*>& out_data)
override {
244 (*deconv_layer_worker_storage_).prev_out_ = in_data[0];
245 const tensor_t& in = *in_data[0];
246 const vec_t& W = (*in_data[1])[0];
247 const vec_t& bias = (*in_data[2])[0];
248 const tensor_t& in_r = *in_data[3];
249 const vec_t& W_r = (*in_data[4])[0];
250 const vec_t& b_r = (*in_data[5])[0];
251 tensor_t& a = *out_data[1];
252 tensor_t& a_r = *out_data[2];
254 fill_tensor(a, float_t(0), params_d_->out.size());
256 for (serial_size_t i = 0; i < in.size(); i++) {
257 kernels::tiny_quantized_deconv2d_kernel(*params_d_,
258 in[i], W, bias, in_r[i], W_r, b_r, a[i], a_r[i], layer_->parallelize());
261 copy_and_unpad_output(a);
262 a = *(*deconv_layer_worker_storage_).curr_out_unpadded_;
265 void deconv2d(
const std::vector<tensor_t*>& in_data,
266 const std::vector<tensor_t*>& out_data,
267 std::vector<tensor_t*>& out_grad,
268 std::vector<tensor_t*>& in_grad)
override {
271 if (params_d_->pad_type == padding::same)
272 copy_and_pad_delta(cws.curr_delta_padded, *in_grad[0]);
274 const tensor_t& prev_out = *(cws.prev_out_);
275 const vec_t& W = (*in_data[1])[0];
276 tensor_t& dW = *in_grad[1];
277 tensor_t& db = *in_grad[2];
278 tensor_t& curr_delta = (params_d_->pad_type == padding::same) ? cws.curr_delta_padded : *out_grad[1];
279 tensor_t* prev_delta = in_grad[0];
281 assert(W.size() == params_d_->weight.size());
282 assert(dW[0].size() == params_d_->weight.size());
283 assert(curr_delta[0].size() == layer_->
out_shape()[0].size());
285 backward_activation(*out_grad[0], *out_data[0], curr_delta);
287 fill_tensor(*prev_delta, float_t(0));
289 kernels::tiny_deconv2d_back_kernel(*params_d_,
290 prev_out, W, dW, db, curr_delta, prev_delta);
293 void deconv2d_q(
const std::vector<tensor_t*>& in_data,
294 const std::vector<tensor_t*>& out_data,
295 std::vector<tensor_t*>& out_grad,
296 std::vector<tensor_t*>& in_grad)
override {
299 if (params_d_->pad_type == padding::same)
300 copy_and_pad_delta(cws.curr_delta_padded, *in_grad[0]);
302 const tensor_t& prev_out = *(cws.prev_out_);
303 const vec_t& W = (*in_data[1])[0];
304 tensor_t& dW = *in_grad[1];
305 tensor_t& db = *in_grad[2];
306 tensor_t& curr_delta = (params_d_->pad_type == padding::same) ? cws.curr_delta_padded : *out_grad[1];
307 tensor_t* prev_delta = in_grad[0];
309 assert(W.size() == params_d_->weight.size());
310 assert(dW[0].size() == params_d_->weight.size());
311 assert(curr_delta[0].size() == layer_->
out_shape()[0].size());
313 backward_activation(*out_grad[0], *out_data[0], curr_delta);
315 fill_tensor(*prev_delta, float_t(0));
317 for (serial_size_t i = 0; i < prev_out.size(); i++) {
318 kernels::tiny_quantized_deconv2d_back_kernel(*params_d_,
319 prev_out[i], W, dW[i], db[i], curr_delta[i], &(*prev_delta)[i]);
323 void maxpool(
const std::vector<tensor_t*>& in_data,
324 std::vector<tensor_t*>& out_data)
override {
326 if (max_pooling_layer_worker_storage_) {}
339 void maxpool(
const std::vector<tensor_t*>& in_data,
340 const std::vector<tensor_t*>& out_data,
341 std::vector<tensor_t*>& out_grad,
342 std::vector<tensor_t*>& in_grad)
override {
354 void fully(
const std::vector<tensor_t*>& in_data,
355 std::vector<tensor_t*>& out_data)
override {
365 void fully_q(
const std::vector<tensor_t*>& in_data,
366 std::vector<tensor_t*>& out_data)
override {
367 #ifdef CNN_USE_GEMMLOWP
368 const tensor_t& in = *in_data[0];
369 const vec_t& W = (*in_data[1])[0];
370 tensor_t& a = *out_data[1];
372 for (serial_size_t i = 0; i < in.size(); i++) {
373 kernels::tiny_quantized_fully_connected_kernel(*params_f_,
374 in[i], W, params_f_->has_bias_ ? (*in_data[2])[0] : vec_t(),
375 a[i], layer_->parallelize());
382 void fully_eq(
const std::vector<tensor_t*>& in_data,
383 std::vector<tensor_t*>& out_data)
override {
384 #ifdef CNN_USE_GEMMLOWP
385 const tensor_t& in = *in_data[0];
386 const vec_t& W = (*in_data[1])[0];
387 vec_t& b = (*in_data[2])[0];
388 const tensor_t& in_r = *in_data[3];
389 const vec_t& W_r = (*in_data[4])[0];
390 const vec_t& b_r = (*in_data[5])[0];
391 tensor_t& a = *out_data[1];
392 tensor_t& a_r = *out_data[2];
394 for (serial_size_t i = 0; i < in.size(); i++) {
395 kernels::tiny_quantized_fully_connected_kernel(*params_f_,
396 in[i], W, b, in_r[i], W_r, b_r, a[i], a_r[i], layer_->parallelize());
403 void fully(
const std::vector<tensor_t*>& in_data,
404 const std::vector<tensor_t*>& out_data,
405 std::vector<tensor_t*>& out_grad,
406 std::vector<tensor_t*>& in_grad)
override {
420 void fully_q(
const std::vector<tensor_t*>& in_data,
421 const std::vector<tensor_t*>& out_data,
422 std::vector<tensor_t*>& out_grad,
423 std::vector<tensor_t*>& in_grad)
override {
424 #ifdef CNN_USE_GEMMLOWP
425 const tensor_t& prev_out = *in_data[0];
426 const vec_t& W = (*in_data[1])[0];
427 tensor_t& dW = *in_grad[1];
428 tensor_t& db = *in_grad[2];
429 tensor_t& prev_delta = *in_grad[0];
430 tensor_t& curr_delta = *out_grad[1];
432 backward_activation(*out_grad[0], *out_data[0], curr_delta);
434 for (serial_size_t i = 0; i < prev_out.size(); i++) {
435 kernels::tiny_quantized_fully_connected_back_kernel(*params_f_, prev_out[i],
436 W, dW[i], prev_delta[i], curr_delta[i], db[i], layer_->parallelize());
443 backend_t type()
const override {
return default_engine(); }
455 std::vector<std::vector<serial_size_t>>* out2in_;
456 std::vector<serial_size_t>* in2out_;
459 std::function<void(
const tensor_t&)> copy_and_pad_input;
460 std::function<void(
const tensor_t&)> copy_and_unpad_output;
461 std::function<void(
const tensor_t&, tensor_t&)> copy_and_unpad_delta;
462 std::function<void(
const tensor_t&, tensor_t&)> copy_and_pad_delta;
463 std::function<void(
const tensor_t&,
const tensor_t&, tensor_t&)> backward_activation;
Definition: conv_params.h:92
Definition: fully_params.h:34
Definition: backend_tiny.h:43
virtual std::vector< shape3d > out_shape() const =0
array of output shapes (width x height x depth)
Definition: nn_error.h:83
Definition: conv_params.h:34
Definition: deconv_params.h:32
Definition: deconv_params.h:39
Definition: maxpool_params.h:51