tiny_dnn  1.0.0
A header only, dependency-free deep learning framework in C++11
dropout_layer.h
1 /*
2  Copyright (c) 2013, Taiga Nomi
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are met:
7  * Redistributions of source code must retain the above copyright
8  notice, this list of conditions and the following disclaimer.
9  * Redistributions in binary form must reproduce the above copyright
10  notice, this list of conditions and the following disclaimer in the
11  documentation and/or other materials provided with the distribution.
12  * Neither the name of the <organization> nor the
13  names of its contributors may be used to endorse or promote products
14  derived from this software without specific prior written permission.
15 
16  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
17  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
20  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 #pragma once
28 #include "tiny_dnn/util/util.h"
29 #include "tiny_dnn/layers/layer.h"
30 #include <algorithm>
31 
32 namespace tiny_dnn {
33 
37 class dropout_layer : public layer {
38 public:
40  typedef layer Base;
41 
47  dropout_layer(serial_size_t in_dim, float_t dropout_rate, net_phase phase = net_phase::train)
48  : Base({vector_type::data}, {vector_type::data}),
49  phase_(phase),
50  dropout_rate_(dropout_rate),
51  scale_(float_t(1) / (float_t(1) - dropout_rate_)),
52  in_size_(in_dim)
53  {
54  mask_.resize(1, std::vector<uint8_t>(in_dim));
55  clear_mask();
56  }
57 
58  dropout_layer(const dropout_layer& obj) = default;
59  virtual ~dropout_layer(){}
60 
61 #ifdef CNN_USE_DEFAULT_MOVE_CONSTRUCTORS
62  dropout_layer(dropout_layer&& obj) = default;
63  dropout_layer& operator=(const dropout_layer& obj) = default;
64  dropout_layer& operator=(dropout_layer&& obj) = default;
65 #endif
66 
67  void set_dropout_rate(float_t rate)
68  {
69  dropout_rate_ = rate;
70  scale_ = float_t(1) / (float_t(1) - dropout_rate_);
71  }
72 
73  float_t dropout_rate() const {
74  return dropout_rate_;
75  }
76 
78  serial_size_t fan_in_size() const override
79  {
80  return 1;
81  }
82 
84  serial_size_t fan_out_size() const override
85  {
86  return 1;
87  }
88 
89  std::vector<index3d<serial_size_t>> in_shape() const override {
90  return{ index3d<serial_size_t>(in_size_, 1, 1) };
91  }
92 
93  std::vector<index3d<serial_size_t>> out_shape() const override {
94  return{ index3d<serial_size_t>(in_size_, 1, 1) };
95  }
96 
97  void back_propagation(const std::vector<tensor_t*>& in_data,
98  const std::vector<tensor_t*>& out_data,
99  std::vector<tensor_t*>& out_grad,
100  std::vector<tensor_t*>& in_grad) override {
101  tensor_t& prev_delta = *in_grad[0];
102  const tensor_t& curr_delta = *out_grad[0];
103 
104  CNN_UNREFERENCED_PARAMETER(in_data);
105  CNN_UNREFERENCED_PARAMETER(out_data);
106 
107  for (serial_size_t sample = 0; sample < static_cast<serial_size_t>(prev_delta.size()); ++sample) {
108  for (serial_size_t i = 0; i < static_cast<serial_size_t>(curr_delta.size()); i++) {
109  prev_delta[sample][i] = mask_[sample][i] * curr_delta[sample][i];
110  }
111  }
112  }
113 
114  void forward_propagation(const std::vector<tensor_t*>& in_data,
115  std::vector<tensor_t*>& out_data) override {
116  const tensor_t& in = *in_data[0];
117  tensor_t& out = *out_data[0];
118 
119  const size_t sample_count = in.size();
120 
121  if (mask_.size() < sample_count) {
122  mask_.resize(sample_count, mask_[0]);
123  }
124 
125  for (size_t sample = 0, sample_count = in.size(); sample < sample_count; ++sample) {
126 
127  std::vector<uint8_t>& mask = mask_[sample];
128 
129  const vec_t& in_vec = in[sample];
130  vec_t& out_vec = out[sample];
131 
132  if (phase_ == net_phase::train) {
133  for (size_t i = 0; i < in_vec.size(); i++)
134  mask[i] = bernoulli(dropout_rate_);
135 
136  for (size_t i = 0; i < in_vec.size(); i++)
137  out_vec[i] = mask[i] * scale_ * in_vec[i];
138  }
139  else {
140  for (size_t i = 0, end = in_vec.size(); i < end; i++)
141  out_vec[i] = in_vec[i];
142  }
143  }
144  }
145 
149  void set_context(net_phase ctx) override
150  {
151  phase_ = ctx;
152  }
153 
154  std::string layer_type() const override { return "dropout"; }
155 
156  // currently used by tests only
157  const std::vector<uint8_t>& get_mask(serial_size_t sample_index) const {
158  return mask_[sample_index];
159  }
160 
161  void clear_mask() {
162  for (auto& sample : mask_) {
163  std::fill(sample.begin(), sample.end(), 0);
164  }
165  }
166 
167  template <class Archive>
168  static void load_and_construct(Archive & ar, cereal::construct<dropout_layer> & construct) {
169  net_phase phase;
170  float_t dropout_rate;
171  serial_size_t in_size;
172 
173  ar(cereal::make_nvp("in_size", in_size), cereal::make_nvp("dropout_rate", dropout_rate), cereal::make_nvp("phase", phase));
174  construct(in_size, dropout_rate, phase);
175  }
176 
177  template <class Archive>
178  void serialize(Archive & ar) {
179  layer::serialize_prolog(ar);
180  ar(cereal::make_nvp("in_size", in_size_), cereal::make_nvp("dropout_rate", dropout_rate_), cereal::make_nvp("phase", phase_));
181  }
182 
183 private:
184  net_phase phase_;
185  float_t dropout_rate_;
186  float_t scale_;
187  serial_size_t in_size_;
188  std::vector<std::vector<uint8_t>> mask_;
189 };
190 
191 } // namespace tiny_dnn
Definition: activation_function.h:67
applies dropout to the input
Definition: dropout_layer.h:37
serial_size_t fan_out_size() const override
number of outgoing connections for each input unit used only for weight/bias initialization methods w...
Definition: dropout_layer.h:84
std::vector< index3d< serial_size_t > > out_shape() const override
array of output shapes (width x height x depth)
Definition: dropout_layer.h:93
void back_propagation(const std::vector< tensor_t * > &in_data, const std::vector< tensor_t * > &out_data, std::vector< tensor_t * > &out_grad, std::vector< tensor_t * > &in_grad) override
return delta of previous layer (delta=\frac{dE}{da}, a=wx in fully-connected layer)
Definition: dropout_layer.h:97
dropout_layer(serial_size_t in_dim, float_t dropout_rate, net_phase phase=net_phase::train)
Definition: dropout_layer.h:47
float_t dropout_rate() const
number of incoming connections for each output unit
Definition: dropout_layer.h:73
std::string layer_type() const override
name of layer, should be unique for each concrete class
Definition: dropout_layer.h:154
serial_size_t fan_in_size() const override
number of outgoing connections for each input unit
Definition: dropout_layer.h:78
void forward_propagation(const std::vector< tensor_t * > &in_data, std::vector< tensor_t * > &out_data) override
Definition: dropout_layer.h:114
void set_context(net_phase ctx) override
set dropout-context (training-phase or test-phase)
Definition: dropout_layer.h:149
std::vector< index3d< serial_size_t > > in_shape() const override
array of input shapes (width x height x depth)
Definition: dropout_layer.h:89
base class of all kind of NN layers
Definition: layer.h:62
serial_size_t in_size() const
!
Definition: layer.h:176