tiny_dnn  1.0.0
A header only, dependency-free deep learning framework in C++11
layer_factory_impl.h
1 /*
2  Copyright (c) 2013, Taiga Nomi
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are met:
7  * Redistributions of source code must retain the above copyright
8  notice, this list of conditions and the following disclaimer.
9  * Redistributions in binary form must reproduce the above copyright
10  notice, this list of conditions and the following disclaimer in the
11  documentation and/or other materials provided with the distribution.
12  * Neither the name of the <organization> nor the
13  names of its contributors may be used to endorse or promote products
14  derived from this software without specific prior written permission.
15 
16  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
17  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
20  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 #pragma once
28 #include <algorithm>
29 #include <memory>
30 #include <vector>
31 #include <map>
32 #include <string>
33 #include <limits>
34 #include <unordered_map>
35 
36 #include "caffe.pb.h"
37 
38 #include "tiny_dnn/layers/convolutional_layer.h"
39 #include "tiny_dnn/layers/deconvolutional_layer.h"
40 #include "tiny_dnn/layers/fully_connected_layer.h"
41 #include "tiny_dnn/layers/average_pooling_layer.h"
42 #include "tiny_dnn/layers/max_pooling_layer.h"
43 #include "tiny_dnn/layers/linear_layer.h"
44 #include "tiny_dnn/layers/lrn_layer.h"
45 #include "tiny_dnn/layers/dropout_layer.h"
46 
48 
49 #ifdef _MSC_VER
50 #define _NOMINMAX
51 #include <io.h>
52 #include <fcntl.h>
53 #define CNN_OPEN_BINARY(filename) open(filename, _O_RDONLY|_O_BINARY)
54 #define CNN_OPEN_TXT(filename) open(filename, _O_RDONLY)
55 #pragma warning(push)
56 #pragma warning(disable:4996)
57 #else
58 #include <unistd.h>
59 #include <sys/types.h>
60 #include <sys/stat.h>
61 #include <fcntl.h>
62 #define CNN_OPEN_BINARY(filename) open(filename, O_RDONLY)
63 #define CNN_OPEN_TXT(filename) open(filename, O_RDONLY)
64 #endif
65 
66 namespace tiny_dnn {
67 namespace detail {
68 
69 inline void read_proto_from_text(const std::string& prototxt,
70  google::protobuf::Message *message) {
71  int fd = CNN_OPEN_TXT(prototxt.c_str());
72  if (fd == -1) {
73  throw nn_error("file not fonud: " + prototxt);
74  }
75 
76  google::protobuf::io::FileInputStream input(fd);
77  input.SetCloseOnDelete(true);
78 
79  if (!google::protobuf::TextFormat::Parse(&input, message)) {
80  throw nn_error("failed to parse");
81  }
82 }
83 
84 inline void read_proto_from_binary(const std::string& protobinary,
85  google::protobuf::Message *message) {
86  int fd = CNN_OPEN_BINARY(protobinary.c_str());
87  google::protobuf::io::FileInputStream rawstr(fd);
88  google::protobuf::io::CodedInputStream codedstr(&rawstr);
89 
90  rawstr.SetCloseOnDelete(true);
91  codedstr.SetTotalBytesLimit(std::numeric_limits<int>::max(),
92  std::numeric_limits<int>::max() / 2);
93 
94  if (!message->ParseFromCodedStream(&codedstr)) {
95  throw nn_error("failed to parse");
96  }
97 }
98 
99 inline std::shared_ptr<weight_init::function>
100 create_filler(const std::string& filler) {
101  if (filler == "xavier") {
102  return std::make_shared<weight_init::xavier>();
103  } else if (filler == "constant") {
104  return std::make_shared<weight_init::constant>();
105  } else if (filler == "gaussian") {
106  return std::make_shared<weight_init::gaussian>();
107  } else {
108  throw nn_error("unsupported filler type");
109  }
110 }
111 
112 template <typename param>
113 inline bool get_kernel_size_2d(const param& p, layer_size_t *kernel) {
114  if (p.has_kernel_w() && p.has_kernel_h()) {
115  if (p.kernel_w() != p.kernel_h()) {
116  throw nn_error("unsupported kernel shape");
117  }
118  *kernel = p.kernel_w();
119  return true;
120  }
121  return false;
122 }
123 
124 template <typename param>
125 inline bool get_kernel_size_2d(const param& p, layer_size_t *kernel_w, layer_size_t *kernel_h) {
126  if (p.has_kernel_w() && p.has_kernel_h()) {
127  *kernel_w = p.kernel_w();
128  *kernel_h = p.kernel_h();
129  return true;
130  }
131  return false;
132 }
133 
134 inline layer_size_t get_kernel_size_2d(const caffe::ConvolutionParameter& p) {
135  layer_size_t window_size;
136  if (!get_kernel_size_2d(p, &window_size)) {
137  if (p.kernel_size_size() > 1) {
138  throw nn_error("unsupported kernel shape");
139  }
140  window_size = p.kernel_size(0);
141  }
142  return window_size;
143 }
144 
145 inline std::shared_ptr<layer> create_max_pool(layer_size_t pool_size_w,
146  layer_size_t pool_size_h,
147  layer_size_t stride_w,
148  layer_size_t stride_h,
149  padding pad_type,
150  const shape_t& bottom_shape,
151  shape_t *top_shape) {
152  using max_pool = max_pooling_layer<activation::identity>;
153  auto mp = std::make_shared<max_pool>(bottom_shape.width_,
154  bottom_shape.height_,
155  bottom_shape.depth_,
156  pool_size_w, pool_size_h, stride_w, stride_h, pad_type);
157 
158  *top_shape = mp->out_shape()[0];
159  mp->init_weight();
160 
161  return mp;
162 }
163 
164 inline std::shared_ptr<layer> create_ave_pool(layer_size_t pool_size_w,
165  layer_size_t pool_size_h,
166  layer_size_t stride_w,
167  layer_size_t stride_h,
168  padding pad_type,
169  const shape_t& bottom_shape,
170  shape_t *top_shape) {
171  using ave_pool = average_pooling_layer<activation::identity>;
172  auto ap = std::make_shared<ave_pool>(bottom_shape.width_,
173  bottom_shape.height_,
174  bottom_shape.depth_,
175  pool_size_w, pool_size_h, stride_w, stride_h, pad_type);
176 
177  // tiny-dnn has trainable parameter in average-pooling layer
178  float_t weight = float_t(1) / (pool_size_w * pool_size_h);
179 
180  vec_t& w = *ap->weights()[0];
181  vec_t& b = *ap->weights()[1];
182 
183  std::fill(w.begin(), w.end(), weight);
184  std::fill(b.begin(), b.end(), float_t(0));
185 
186  *top_shape = ap->out_shape()[0];
187  ap->init_weight();
188  ap->set_trainable(false);
189 
190  return ap;
191 }
192 
193 inline
194 std::shared_ptr<layer> create_softmax(const caffe::LayerParameter& layer,
195  const shape_t& bottom_shape, shape_t *) {
196  auto sm = std::make_shared<linear_layer<activation::softmax>>(
197  bottom_shape.size());
198  sm->init_weight();
199  return sm;
200 }
201 
202 inline
203 std::shared_ptr<layer> create_sigmoid(const caffe::LayerParameter& layer,
204  const shape_t& bottom_shape, shape_t *) {
205  auto ce = std::make_shared<linear_layer<activation::sigmoid>>(
206  bottom_shape.size());
207  return ce;
208 }
209 
210 inline
211 std::shared_ptr<layer> create_tanh(const caffe::LayerParameter& layer,
212  const shape_t& bottom_shape, shape_t *) {
213  auto tanh = std::make_shared<linear_layer<activation::tan_h>>(
214  bottom_shape.size());
215  return tanh;
216 }
217 
218 inline
219 std::shared_ptr<layer> create_power(const caffe::LayerParameter& layer,
220  const shape_t& bottom_shape, shape_t *) {
221  auto power = std::make_shared<power_layer>(bottom_shape, layer.power_param().power(), layer.power_param().scale());
222  return power;
223 }
224 
225 
226 inline
227 std::shared_ptr<layer> create_pooling(const caffe::LayerParameter& layer,
228  const shape_t& bottom_shape,
229  shape_t *top_shape) {
230  if (!layer.has_pooling_param()) {
231  throw nn_error("pool param missing");
232  }
233 
234  auto pool_param = layer.pooling_param();
235 
236  layer_size_t h_stride = 0;
237  layer_size_t w_stride = 0;
238  layer_size_t pool_size_w = 0;
239  layer_size_t pool_size_h = 0;
240  layer_size_t h_pad = 0;
241  layer_size_t w_pad = 0;
242  padding pad_type = padding::valid;
243 
244  if (!get_kernel_size_2d(pool_param, &pool_size_w, &pool_size_h)) {
245  pool_size_w = pool_size_h = pool_param.kernel_size();
246  }
247 
248  if (pool_param.has_stride() || pool_param.has_stride_h()) {
249  h_stride = pool_param.has_stride() ?
250  pool_param.stride() : pool_param.stride_h();
251  }
252 
253  if (pool_param.has_stride() || pool_param.has_stride_w()) {
254  w_stride = pool_param.has_stride() ?
255  pool_param.stride() : pool_param.stride_w();
256  }
257 
258  if (pool_param.has_pad() || pool_param.has_pad_w()) {
259  w_pad = pool_param.has_pad() ?
260  pool_param.pad() : pool_param.pad_w();
261  }
262 
263  if (pool_param.has_pad() || pool_param.has_pad_h()) {
264  h_pad = pool_param.has_pad() ?
265  pool_param.pad() : pool_param.pad_h();
266  }
267 
268  if (w_pad != 0) {
269  if (w_pad == pool_size_w - 1) {
270  pad_type = padding::same;
271  }
272  else {
273  throw nn_error("unsupported padding type");
274  }
275  }
276 
277  if (h_pad != 0) {
278  if (h_pad == pool_size_h - 1) {
279  pad_type = padding::same;
280  }
281  else {
282  throw nn_error("unsupported padding type");
283  }
284  }
285 
286  if (pool_param.has_pool()) {
287  auto type = pool_param.pool();
288 
289  switch (type) {
290  case caffe::PoolingParameter_PoolMethod_MAX:
291  return create_max_pool(pool_size_w, pool_size_h, w_stride, h_stride, pad_type,
292  bottom_shape, top_shape);
293  case caffe::PoolingParameter_PoolMethod_AVE:
294  return create_ave_pool(pool_size_w, pool_size_h, w_stride, h_stride, pad_type,
295  bottom_shape, top_shape);
296  default:
297  throw nn_error("unsupported layer type");
298  }
299  }
300 
301  // default: max-pool
302  return create_max_pool(pool_size_w, pool_size_h, w_stride, h_stride, pad_type, bottom_shape, top_shape);
303 }
304 
305 inline
306 std::shared_ptr<layer> create_relu(const caffe::LayerParameter& layer,
307  const shape_t& bottom_shape, shape_t *) {
308  auto relu = std::make_shared<linear_layer<activation::relu>>(
309  bottom_shape.size());
310  return relu;
311 }
312 
313 inline std::shared_ptr<layer> create_batchnorm(const caffe::LayerParameter& layer,
314  const shape_t& bottom_shape, shape_t *top_shape) {
315  using bn_layer = batch_normalization_layer;
316 
317  *top_shape = bottom_shape;
318 
319  float_t eps = 1e-5f;
320  float_t momentum = 0.999f;
321 
322  if (layer.has_batch_norm_param()) {
323  auto bn_param = layer.batch_norm_param();
324 
325  if (bn_param.has_eps()) {
326  eps = bn_param.eps();
327  }
328  if (bn_param.has_moving_average_fraction()) {
329  momentum = bn_param.moving_average_fraction();
330  }
331  }
332 
333  auto bn = std::make_shared<bn_layer>(bottom_shape.area(), bottom_shape.depth_, eps, momentum, net_phase::test);
334 
335  // weight
336  if (layer.blobs_size() > 0) {
337  auto global_stats = layer.blobs();
338  if (global_stats.size() != 3) {
339  throw std::runtime_error("unexpected bn stored statistics");
340  }
341 
342  float_t scale_factor = global_stats.Get(2).data(0) == 0 ? 0 : 1 / global_stats.Get(2).data(0);
343  vec_t mean(bottom_shape.depth_);
344  vec_t variance(bottom_shape.depth_);
345 
346  for (size_t i = 0; i < mean.size(); i++) {
347  mean[i] = global_stats.Get(0).data(i) * scale_factor;
348  variance[i] = global_stats.Get(1).data(i) * scale_factor;
349  }
350  bn->set_mean(mean);
351  bn->set_variance(variance);
352  }
353 
354  return bn;
355 }
356 
357 
358 inline void load_weights_fullyconnected(const caffe::LayerParameter& src,
359  layer *dst) {
360  auto weights = src.blobs(0);
361  int curr = 0;
362 
363  const auto dst_out_size = dst->out_size();
364  const auto dst_in_size = dst->in_size();
365 
366  if (dst_out_size * dst_in_size !=
367  static_cast<serial_size_t>(weights.data_size())) {
368  throw nn_error(
369  std::string("layer size mismatch!") +
370  "caffe(" + src.name() + "):" + to_string(weights.data_size()) + "\n" +
371  "tiny-dnn(" + dst->layer_type() + "):" + to_string(dst->weights().size()));
372  }
373 
374  vec_t& w = *dst->weights()[0];
375  vec_t& b = *dst->weights()[1];
376 
377  // fill weights
378  for (size_t o = 0; o < dst_out_size; o++) {
379  for (size_t i = 0; i < dst_in_size; i++) {
380  // TODO: how to access to weights?
381  //dst->weight()[i * dst->out_size() + o] = weights.data(curr++); // transpose
382  w[i * dst_out_size + o] = weights.data(curr++); // transpose
383  }
384  }
385 
386  // fill bias
387  if (src.inner_product_param().bias_term()) {
388  auto biases = src.blobs(1);
389  for (size_t o = 0; o < dst_out_size; o++) {
390  // TODO: how to access to biases?
391  //dst->bias()[o] = biases.data(o);
392  b[o] = biases.data(o);
393  }
394  }
395 }
396 
397 inline std::shared_ptr<layer> create_fullyconnected(
398  const caffe::LayerParameter& layer,
399  const shape_t& bottom_shape, shape_t *top_shape) {
400  using fc_layer = fully_connected_layer<activation::identity>;
401 
402  if (!layer.has_inner_product_param()) {
403  throw nn_error("inner-product param missing");
404  }
405 
406  layer_size_t dim_input = 0, dim_output = 0;
407  bool has_bias = true;
408 
409  auto ip_param = layer.inner_product_param();
410  has_bias = ip_param.bias_term();
411 
412  dim_output = ip_param.num_output();
413  dim_input = bottom_shape.size();
414 
415  auto ip = std::make_shared<fc_layer>(dim_input, dim_output, has_bias);
416 
417  // filler
418  if (ip_param.has_weight_filler()) {
419  ip->weight_init(create_filler(ip_param.weight_filler().type()));
420  }
421 
422  if (ip_param.has_bias_filler()) {
423  ip->bias_init(create_filler(ip_param.bias_filler().type()));
424  }
425 
426  // weight
427  if (layer.blobs_size() > 0) {
428  load_weights_fullyconnected(layer, ip.get());
429  }
430 
431  // TODO: check if it works
432  *top_shape = ip->out_shape()[0];
433  return ip;
434 }
435 
436 inline void load_weights_conv(const caffe::LayerParameter& src, layer *dst) {
437  // fill weight
438  auto weights = src.blobs(0);
439 
440  //TODO: check if it works
441  //int out_channels = dst->out_shape().depth_;
442  //int in_channels = dst->in_shape().depth_;
443  int out_channels = dst->out_data_shape()[0].depth_;
444  int in_channels = dst->in_data_shape()[0].depth_;
445 
446  connection_table table;
447  auto conv_param = src.convolution_param();
448  int dst_idx = 0;
449  int src_idx = 0;
450  int window_size = get_kernel_size_2d(conv_param);
451 
452  if (conv_param.has_group()) {
453  table = connection_table(conv_param.group(), in_channels, out_channels);
454  }
455 
456  vec_t& w = *dst->weights()[0];
457  vec_t& b = *dst->weights()[1];
458 
459  // fill weights
460  for (int o = 0; o < out_channels; o++) {
461  for (int i = 0; i < in_channels; i++) {
462  if (!table.is_connected(o, i)) {
463  dst_idx += window_size * window_size;
464  continue;
465  }
466  for (int x = 0; x < window_size * window_size; x++) {
467  //TODO
468  //dst->weight()[dst_idx++] = weights.data(src_idx++);
469  w[dst_idx++] = weights.data(src_idx++);
470  }
471  }
472  }
473 
474  // fill bias
475  if (conv_param.bias_term()) {
476  auto biases = src.blobs(1);
477  for (int o = 0; o < out_channels; o++) {
478  //TODO
479  //dst->bias()[o] = biases.data(o);
480  b[o] = biases.data(o);
481  }
482  }
483 }
484 
485 inline void load_weights_pool(const caffe::LayerParameter& src, layer *dst) {
486  auto pool_param = src.pooling_param();
487 
488  //TODO
489  //if (dst->weight().size()) {
490  if (dst->weights().size()) {
491  layer_size_t pool_size = 0;
492 
493  if (!get_kernel_size_2d(pool_param, &pool_size)) {
494  pool_size = pool_param.kernel_size();
495  }
496 
497  // tiny-dnn has trainable parameter in average-pooling layer
498  float_t weight = float_t(1) / sqr(pool_size);
499 
500  //TODO
501  /*if (!dst->weight().empty()) {
502  std::fill(dst->weight().begin(), dst->weight().end(), weight);
503  }
504  if (!dst->bias().empty()) {
505  std::fill(dst->bias().begin(), dst->bias().end(), float_t(0));
506  dst->init_bias();
507  }*/
508 
509  vec_t& w = *dst->weights()[0];
510  vec_t& b = *dst->weights()[1];
511 
512  if (!w.empty()) {
513  std::fill(w.begin(), w.end(), weight);
514  }
515  if (!b.empty()) {
516  std::fill(b.begin(), b.end(), float_t(0));
517  //dst->init_bias();
518  }
519  }
520 }
521 
522 inline
523 std::shared_ptr<layer> create_lrn(const caffe::LayerParameter& layer,
524  const shape_t& bottom_shape,
525  shape_t *top_shape) {
526  using lrn_layer = lrn_layer<activation::identity>;
527 
528  if (!layer.has_lrn_param()) {
529  throw nn_error("lrn param missing");
530  }
531 
532  auto lrn_param = layer.lrn_param();
533  layer_size_t local_size = 5;
534  float_t alpha = 1;
535  float_t beta = 5;
536  norm_region region = norm_region::across_channels;
537 
538  if (lrn_param.has_local_size()) local_size = lrn_param.local_size();
539  if (lrn_param.has_alpha()) alpha = lrn_param.alpha();
540  if (lrn_param.has_beta()) beta = lrn_param.beta();
541  if (lrn_param.has_norm_region()) {
542  if (lrn_param.norm_region() == caffe::LRNParameter_NormRegion_WITHIN_CHANNEL) // NOLINT
543  region = norm_region::within_channels;
544  }
545 
546  auto lrn = std::make_shared<lrn_layer>(bottom_shape.width_,
547  bottom_shape.height_,
548  local_size,
549  bottom_shape.depth_,
550  alpha, beta, region);
551  return lrn;
552 }
553 
554 inline
555 std::shared_ptr<layer> create_dropout(const caffe::LayerParameter& layer,
556  const shape_t& bottom_shape,
557  shape_t *top_shape) {
558  if (!layer.has_dropout_param()) {
559  throw nn_error("dropout param missing");
560  }
561 
562  float_t dropout_rate = float_t(0.5);
563 
564  if (layer.dropout_param().has_dropout_ratio()) {
565  dropout_rate = layer.dropout_param().dropout_ratio();
566  }
567 
568  auto dropout = std::make_shared<dropout_layer>(bottom_shape.size(),
569  dropout_rate,
570  net_phase::test);
571  return dropout;
572 }
573 
574 inline
575 std::shared_ptr<layer> create_convlayer(const caffe::LayerParameter& layer,
576  const shape_t& bottom_shape,
577  shape_t *top_shape) {
578  using conv_layer = convolutional_layer<activation::identity>;
579 
580  if (!layer.has_convolution_param()) {
581  throw nn_error("convolution param missing");
582  }
583 
584  // layer parameters
585  layer_size_t in_width = 0, in_height = 0, window_size = 0;
586  layer_size_t in_channels = 0, out_channels = 0;
587  layer_size_t w_stride = 1, h_stride = 1;
588  bool has_bias = true;
589  padding pad_type = padding::valid;
590  connection_table table;
591 
592  auto conv_param = layer.convolution_param();
593 
594  // shape
595  out_channels = conv_param.num_output();
596  in_channels = bottom_shape.depth_;
597  in_width = bottom_shape.width_;
598  in_height = bottom_shape.height_;
599  has_bias = conv_param.bias_term();
600  window_size = get_kernel_size_2d(conv_param);
601 
602  // padding
603  if (conv_param.pad_size() == 1 ||
604  (conv_param.has_pad_w() && conv_param.has_pad_h())) {
605  uint32_t pad_w = conv_param.pad_size() == 1 ?
606  conv_param.pad(0) : conv_param.pad_w();
607 
608  uint32_t pad_h = conv_param.pad_size() == 1 ?
609  conv_param.pad(0) : conv_param.pad_h();
610 
611  if (pad_w != pad_h) {
612  throw nn_error("conv:not supported padding size");
613  }
614 
615  // 0 ... valid, (window_size-1)/2 ... same
616  if (pad_w == (window_size - 1) / 2) {
617  pad_type = padding::same;
618  } else if (pad_w == 0) {
619  pad_type = padding::valid;
620  } else {
621  throw nn_error("conv:not supported padding size");
622  }
623  }
624 
625  // stride
626  if (conv_param.stride_size() == 1 || conv_param.has_stride_h()) {
627  h_stride = conv_param.stride_size() == 1 ?
628  conv_param.stride(0) : conv_param.stride_h();
629  }
630 
631  if (conv_param.stride_size() == 1 || conv_param.has_stride_w()) {
632  w_stride = conv_param.stride_size() == 1 ?
633  conv_param.stride(0) : conv_param.stride_w();
634  }
635 
636  // group
637  if (conv_param.has_group()) {
638  table = connection_table(conv_param.group(), in_channels, out_channels);
639  }
640 
641  auto conv = std::make_shared<conv_layer>(in_width, in_height,
642  window_size,
643  in_channels, out_channels,
644  table,
645  pad_type,
646  has_bias,
647  w_stride, h_stride);
648  // filler
649  if (conv_param.has_weight_filler()) {
650  conv->weight_init(create_filler(conv_param.weight_filler().type()));
651  }
652 
653  if (conv_param.has_bias_filler()) {
654  conv->bias_init(create_filler(conv_param.bias_filler().type()));
655  }
656 
657  // set weight (optional)
658  if (layer.blobs_size() > 0) { // blobs(0)...weight, blobs(1)...bias
659  load_weights_conv(layer, conv.get());
660  }
661 
662  *top_shape = conv->out_shape()[0];
663  return conv;
664 }
665 
666 inline
667 std::shared_ptr<layer> create_deconvlayer(const caffe::LayerParameter& layer,
668  const shape_t& bottom_shape,
669  shape_t *top_shape) {
670  using deconv_layer = deconvolutional_layer<activation::identity>;
671 
672  if (!layer.has_convolution_param()) {
673  throw nn_error("deconvolution param missing");
674  }
675 
676  // layer parameters
677  layer_size_t in_width = 0, in_height = 0, window_size = 0;
678  layer_size_t in_channels = 0, out_channels = 0;
679  layer_size_t w_stride = 1, h_stride = 1;
680  bool has_bias = true;
681  padding pad_type = padding::valid;
682  connection_table table;
683 
684  auto deconv_param = layer.convolution_param();
685 
686  // shape
687  out_channels = deconv_param.num_output();
688  in_channels = bottom_shape.depth_;
689  in_width = bottom_shape.width_;
690  in_height = bottom_shape.height_;
691  has_bias = deconv_param.bias_term();
692  window_size = get_kernel_size_2d(deconv_param);
693 
694  // unpadding
695  if (deconv_param.pad_size() == 1 ||
696  (deconv_param.has_pad_w() && deconv_param.has_pad_h())) {
697  uint32_t unpad_w = deconv_param.pad_size() == 1 ?
698  deconv_param.pad(0) : deconv_param.pad_w();
699 
700  uint32_t unpad_h = deconv_param.pad_size() == 1 ?
701  deconv_param.pad(0) : deconv_param.pad_h();
702 
703  if (unpad_w != unpad_h) {
704  throw nn_error("deconv:not supported unpadding size");
705  }
706 
707  // 0 ... valid, (window_size-1)/2 ... same
708  if (unpad_w == (window_size - 1) / 2) {
709  pad_type = padding::same;
710  } else if (unpad_w == 0) {
711  pad_type = padding::valid;
712  } else {
713  throw nn_error("deconv:not supported unpadding size");
714  }
715  }
716 
717  // stride
718  if (deconv_param.stride_size() == 1 || deconv_param.has_stride_h()) {
719  h_stride = deconv_param.stride_size() == 1 ?
720  deconv_param.stride(0) : deconv_param.stride_h();
721  }
722 
723  if (deconv_param.stride_size() == 1 || deconv_param.has_stride_w()) {
724  w_stride = deconv_param.stride_size() == 1 ?
725  deconv_param.stride(0) : deconv_param.stride_w();
726  }
727 
728  // group
729  if (deconv_param.has_group()) {
730  table = connection_table(deconv_param.group(), in_channels, out_channels);
731  }
732 
733  auto deconv = std::make_shared<deconv_layer>(in_width, in_height,
734  window_size,
735  in_channels, out_channels,
736  table,
737  pad_type,
738  has_bias,
739  w_stride, h_stride);
740  // filler
741  if (deconv_param.has_weight_filler()) {
742  deconv->weight_init(create_filler(deconv_param.weight_filler().type()));
743  }
744 
745  if (deconv_param.has_bias_filler()) {
746  deconv->bias_init(create_filler(deconv_param.bias_filler().type()));
747  }
748 
749  // set weight (optional)
750  if (layer.blobs_size() > 0) { // blobs(0)...weight, blobs(1)...bias
751  load_weights_conv(layer, deconv.get());
752  }
753  //TODO
754  //*top_shape = deconv->out_shape();
755  *top_shape = deconv->out_shape()[0];
756  return deconv;
757 }
758 
759 inline bool layer_skipped(const std::string& type) {
760  if (type == "Data" || type == "EuclideanLoss" || type == "Input") return true;
761  return false;
762 }
763 
764 inline bool layer_supported(const std::string& type) {
765  static const char* supported[] = {
766  "InnerProduct", "Convolution", "Deconvolution", "Pooling",
767  "LRN", "Dropout",
768  "SoftmaxWithLoss", "SigmoidCrossEntropyLoss",
769  "ReLU", "Sigmoid", "TanH", "Softmax", "BatchNorm", "Power"
770  };
771 
772  for (size_t i = 0; i < sizeof(supported) / sizeof(supported[0]); i++) {
773  if (supported[i] == type) return true;
774  }
775  return false;
776 }
777 
778 inline bool layer_match(const std::string& caffetype,
779  const std::string& tiny_dnn_type) {
780  const char* conversions[][2] = {
781  { "InnerProduct", "fully-connected" },
782  { "Convolution", "conv" },
783  { "Deconvolution", "deconv" },
784  { "Pooling", "ave-pool" },
785  { "Pooling", "max-pool" }
786  };
787 
788  for (size_t i = 0; i < sizeof(conversions) / sizeof(conversions[0]); i++) {
789  if (conversions[i][0] == caffetype &&
790  conversions[i][1] == tiny_dnn_type) return true;
791  }
792  return false;
793 }
794 
795 inline std::shared_ptr<layer> create(const caffe::LayerParameter& layer,
796  const shape_t& in_shape,
797  shape_t* out_shape) {
798  const std::string layer_type = layer.type();
799 
800  if (layer_type == "Convolution") {
801  return detail::create_convlayer(layer, in_shape, out_shape);
802  }
803 
804  if (layer_type == "Deconvolution") {
805  return detail::create_deconvlayer(layer, in_shape, out_shape);
806  }
807 
808  if (layer_type == "InnerProduct") {
809  return detail::create_fullyconnected(layer, in_shape, out_shape);
810  }
811 
812  if (layer_type == "Pooling") {
813  return detail::create_pooling(layer, in_shape, out_shape);
814  }
815 
816  if (layer_type == "BatchNorm") {
817  return detail::create_batchnorm(layer, in_shape, out_shape);
818  }
819 
820  if (layer_type == "LRN") {
821  return detail::create_lrn(layer, in_shape, out_shape);
822  }
823 
824  if (layer_type == "Dropout") {
825  return detail::create_dropout(layer, in_shape, out_shape);
826  }
827 
828  if (layer_type == "SoftmaxWithLoss" ||
829  layer_type == "Softmax") {
830  return detail::create_softmax(layer, in_shape, out_shape);
831  }
832 
833  if (layer_type == "SigmoidCrossEntropyLoss" ||
834  layer_type == "Sigmoid") {
835  return detail::create_sigmoid(layer, in_shape, out_shape);
836  }
837 
838  if (layer_type == "ReLU") {
839  return detail::create_relu(layer, in_shape, out_shape);
840  }
841 
842  if (layer_type == "TanH") {
843  return detail::create_tanh(layer, in_shape, out_shape);
844  }
845 
846  if (layer_type == "Power") {
847  return detail::create_power(layer, in_shape, out_shape);
848  }
849 
850  throw nn_error("layer parser not found");
851 }
852 
853 inline void load(const caffe::LayerParameter& src, layer *dst) {
854  typedef std::function<void(const caffe::LayerParameter&, layer*)> factoryimpl; // NOLINT
855  std::unordered_map<std::string, factoryimpl> factory_registry;
856 
857  factory_registry["Convolution"] = detail::load_weights_conv;
858  factory_registry["Deconvolution"] = detail::load_weights_conv;
859  factory_registry["InnerProduct"] = detail::load_weights_fullyconnected;
860  factory_registry["Pooling"] = detail::load_weights_pool;
861 
862  if (factory_registry.find(src.type()) == factory_registry.end()) {
863  throw nn_error("layer parser not found");
864  }
865 
866  return factory_registry[src.type()](src, dst);
867 }
868 
869 
870 struct layer_node {
871  const caffe::LayerParameter *layer;
872  const layer_node *next; // top-side
873  const layer_node *prev; // bottom-side
874 
875  layer_node() : layer(0), next(0), prev(0) {}
876  explicit layer_node(const caffe::LayerParameter *l)
877  : layer(l), next(0), prev(0) {}
878 };
879 
880 // parse caffe net and interpret as single layer vector
882  public:
883  explicit caffe_layer_vector(const caffe::NetParameter& net_orig)
884  : net(net_orig) {
885  if (net.layers_size() > 0) {
886  upgradev1net(net_orig, &net);
887  }
888 
889  nodes.reserve(net.layer_size());
890 
891  for (int i = 0; i < net.layer_size(); i++) {
892  auto& l = net.layer(i);
893 
894  if (layer_table.find(l.name()) != layer_table.end()) continue;
895 
896  nodes.emplace_back(&l);
897  layer_table[l.name()] = &nodes.back();
898  }
899 
900  for (size_t i = 0; i < nodes.size(); i++) {
901  auto& l = nodes[i];
902 
903  if (l.layer->bottom_size() > 0 && blob_table[l.layer->bottom(0)]) {
904  auto& bottom = blob_table[l.layer->bottom(0)];
905  l.prev = bottom;
906  layer_table[bottom->layer->name()]->next = &l;
907  }
908 
909  if (l.layer->top_size() > 0) {
910  blob_table[l.layer->top(0)] = &l;
911  }
912  }
913 
914  auto root = std::find_if(nodes.begin(),
915  nodes.end(), [](const layer_node& n) {
916  return n.prev == 0;
917  });
918 
919  if (root == nodes.end()) {
920  throw nn_error("root layer not found");
921  }
922 
923  root_node = &*root;
924  const layer_node *current = &*root;
925 
926  while (current) {
927  node_list.push_back(current->layer);
928  current = current->next;
929  }
930  }
931 
932  size_t size() const {
933  return node_list.size();
934  }
935 
936  const caffe::LayerParameter& operator[] (size_t index) const {
937  return *(node_list[index]);
938  }
939 
940  private:
941  void upgradev1net(const caffe::NetParameter& old,
942  caffe::NetParameter *dst) const {
943  dst->CopyFrom(old);
944  dst->clear_layers();
945  dst->clear_layer();
946 
947  for (int i = 0; i < old.layers_size(); i++) {
948  upgradev1layer(old.layers(i), dst->add_layer());
949  }
950  }
951 
952  const char* v1type2name(caffe::V1LayerParameter_LayerType type) const {
953  switch (type) {
954  case caffe::V1LayerParameter_LayerType_NONE:
955  return "";
956  case caffe::V1LayerParameter_LayerType_ABSVAL:
957  return "AbsVal";
958  case caffe::V1LayerParameter_LayerType_ACCURACY:
959  return "Accuracy";
960  case caffe::V1LayerParameter_LayerType_ARGMAX:
961  return "ArgMax";
962  case caffe::V1LayerParameter_LayerType_BNLL:
963  return "BNLL";
964  case caffe::V1LayerParameter_LayerType_CONCAT:
965  return "Concat";
966  case caffe::V1LayerParameter_LayerType_CONTRASTIVE_LOSS:
967  return "ContrastiveLoss";
968  case caffe::V1LayerParameter_LayerType_CONVOLUTION:
969  return "Convolution";
970  case caffe::V1LayerParameter_LayerType_DECONVOLUTION:
971  return "Deconvolution";
972  case caffe::V1LayerParameter_LayerType_DATA:
973  return "Data";
974  case caffe::V1LayerParameter_LayerType_DROPOUT:
975  return "Dropout";
976  case caffe::V1LayerParameter_LayerType_DUMMY_DATA:
977  return "DummyData";
978  case caffe::V1LayerParameter_LayerType_EUCLIDEAN_LOSS:
979  return "EuclideanLoss";
980  case caffe::V1LayerParameter_LayerType_ELTWISE:
981  return "Eltwise";
982  case caffe::V1LayerParameter_LayerType_EXP:
983  return "Exp";
984  case caffe::V1LayerParameter_LayerType_FLATTEN:
985  return "Flatten";
986  case caffe::V1LayerParameter_LayerType_HDF5_DATA:
987  return "HDF5Data";
988  case caffe::V1LayerParameter_LayerType_HDF5_OUTPUT:
989  return "HDF5Output";
990  case caffe::V1LayerParameter_LayerType_HINGE_LOSS:
991  return "HingeLoss";
992  case caffe::V1LayerParameter_LayerType_IM2COL:
993  return "Im2col";
994  case caffe::V1LayerParameter_LayerType_IMAGE_DATA:
995  return "ImageData";
996  case caffe::V1LayerParameter_LayerType_INFOGAIN_LOSS:
997  return "InfogainLoss";
998  case caffe::V1LayerParameter_LayerType_INNER_PRODUCT:
999  return "InnerProduct";
1000  case caffe::V1LayerParameter_LayerType_LRN:
1001  return "LRN";
1002  case caffe::V1LayerParameter_LayerType_MEMORY_DATA:
1003  return "MemoryData";
1004  case caffe::V1LayerParameter_LayerType_MULTINOMIAL_LOGISTIC_LOSS:
1005  return "MultinomialLogisticLoss";
1006  case caffe::V1LayerParameter_LayerType_MVN:
1007  return "MVN";
1008  case caffe::V1LayerParameter_LayerType_POOLING:
1009  return "Pooling";
1010  case caffe::V1LayerParameter_LayerType_POWER:
1011  return "Power";
1012  case caffe::V1LayerParameter_LayerType_RELU:
1013  return "ReLU";
1014  case caffe::V1LayerParameter_LayerType_SIGMOID:
1015  return "Sigmoid";
1016  case caffe::V1LayerParameter_LayerType_SIGMOID_CROSS_ENTROPY_LOSS:
1017  return "SigmoidCrossEntropyLoss";
1018  case caffe::V1LayerParameter_LayerType_SILENCE:
1019  return "Silence";
1020  case caffe::V1LayerParameter_LayerType_SOFTMAX:
1021  return "Softmax";
1022  case caffe::V1LayerParameter_LayerType_SOFTMAX_LOSS:
1023  return "SoftmaxWithLoss";
1024  case caffe::V1LayerParameter_LayerType_SPLIT:
1025  return "Split";
1026  case caffe::V1LayerParameter_LayerType_SLICE:
1027  return "Slice";
1028  case caffe::V1LayerParameter_LayerType_TANH:
1029  return "TanH";
1030  case caffe::V1LayerParameter_LayerType_WINDOW_DATA:
1031  return "WindowData";
1032  case caffe::V1LayerParameter_LayerType_THRESHOLD:
1033  return "Threshold";
1034  default:
1035  throw nn_error("unknown v1 layer-type");
1036  }
1037  }
1038 
1039  void upgradev1layer(const caffe::V1LayerParameter& old,
1040  caffe::LayerParameter *dst) const {
1041  dst->Clear();
1042 
1043  for (int i = 0; i < old.bottom_size(); i++) {
1044  dst->add_bottom(old.bottom(i));
1045  }
1046 
1047  for (int i = 0; i < old.top_size(); i++) {
1048  dst->add_top(old.top(i));
1049  }
1050 
1051  if (old.has_name()) dst->set_name(old.name());
1052  if (old.has_type()) dst->set_type(v1type2name(old.type()));
1053 
1054  for (int i = 0; i < old.blobs_size(); i++) {
1055  dst->add_blobs()->CopyFrom(old.blobs(i));
1056  }
1057 
1058  for (int i = 0; i < old.param_size(); i++) {
1059  while (dst->param_size() <= i) dst->add_param();
1060  dst->mutable_param(i)->set_name(old.param(i));
1061  }
1062 
1063  #define COPY_PARAM(name) if (old.has_##name##_param()) dst->mutable_##name##_param()->CopyFrom(old.name##_param())
1064 
1065  COPY_PARAM(accuracy);
1066  COPY_PARAM(argmax);
1067  COPY_PARAM(concat);
1068  COPY_PARAM(contrastive_loss);
1069  COPY_PARAM(convolution);
1070  COPY_PARAM(data);
1071  COPY_PARAM(dropout);
1072  COPY_PARAM(dummy_data);
1073  COPY_PARAM(eltwise);
1074  COPY_PARAM(exp);
1075  COPY_PARAM(hdf5_data);
1076  COPY_PARAM(hdf5_output);
1077  COPY_PARAM(hinge_loss);
1078  COPY_PARAM(image_data);
1079  COPY_PARAM(infogain_loss);
1080  COPY_PARAM(inner_product);
1081  COPY_PARAM(lrn);
1082  COPY_PARAM(memory_data);
1083  COPY_PARAM(mvn);
1084  COPY_PARAM(pooling);
1085  COPY_PARAM(power);
1086  COPY_PARAM(relu);
1087  COPY_PARAM(sigmoid);
1088  COPY_PARAM(softmax);
1089  COPY_PARAM(slice);
1090  COPY_PARAM(tanh);
1091  COPY_PARAM(threshold);
1092  COPY_PARAM(window_data);
1093  COPY_PARAM(transform);
1094  COPY_PARAM(loss);
1095  #undef COPY_PARAM
1096  }
1097 
1098  caffe::NetParameter net;
1099  layer_node *root_node;
1100  /* layer name -> layer */
1101  std::map<std::string, layer_node*> layer_table;
1102  /* blob name -> bottom holder */
1103  std::map<std::string, layer_node*> blob_table;
1104  std::vector<layer_node> nodes;
1105  std::vector<const caffe::LayerParameter*> node_list;
1106 };
1107 
1108 } // namespace detail
1109 } // namespace tiny_dnn
1110 
1111 #ifdef _MSC_VER
1112 #pragma warning(pop)
1113 #endif
Definition: layer_factory_impl.h:881
base class of all kind of NN layers
Definition: layer.h:62
error exception class for tiny-dnn
Definition: nn_error.h:37
basic class of various network types (sequential, multi-in/multi-out).
Definition: nodes.h:85
element-wise pow: y = scale*x^factor
Definition: power_layer.h:38
slice an input data into multiple outputs along a given slice dimension.
Definition: slice_layer.h:42
Definition: conv_params.h:40
Definition: layer_factory_impl.h:870