28 #include "tiny_dnn/util/util.h"
29 #include "tiny_dnn/util/math_functions.h"
30 #include "tiny_dnn/layers/layer.h"
53 float_t epsilon = 1e-5,
55 net_phase phase = net_phase::train)
56 :
Base({ vector_type::data }, { vector_type::data }),
57 in_channels_(prev_layer.out_shape()[0].depth_),
58 in_spatial_size_(prev_layer.out_shape()[0].area()),
62 update_immidiately_(
false)
76 float_t epsilon = 1e-5,
78 net_phase phase = net_phase::train)
79 :
Base({ vector_type::data }, { vector_type::data }),
81 in_spatial_size_(in_spatial_size),
85 update_immidiately_(
false)
102 std::vector<index3d<serial_size_t>>
in_shape()
const override {
106 std::vector<index3d<serial_size_t>>
out_shape()
const override {
111 const std::vector<tensor_t*>& out_data,
112 std::vector<tensor_t*>& out_grad,
113 std::vector<tensor_t*>& in_grad)
override {
114 tensor_t& prev_delta = *in_grad[0];
115 tensor_t& curr_delta = *out_grad[0];
116 const tensor_t& curr_out = *out_data[0];
117 serial_size_t num_samples =
static_cast<serial_size_t
>(curr_out.size());
119 CNN_UNREFERENCED_PARAMETER(in_data);
121 tensor_t delta_dot_y = curr_out;
122 vec_t mean_delta_dot_y, mean_delta, mean_Y;
124 for (serial_size_t i = 0; i < num_samples; i++) {
125 for (serial_size_t j = 0; j < curr_out[0].size(); j++) {
126 delta_dot_y[i][j] *= curr_delta[i][j];
129 moments(delta_dot_y, in_spatial_size_, in_channels_, &mean_delta_dot_y,
nullptr);
130 moments(curr_delta, in_spatial_size_, in_channels_, &mean_delta,
nullptr);
138 for_i(num_samples, [&](
int i) {
139 for (serial_size_t j = 0; j < in_channels_; j++) {
140 for (serial_size_t k = 0; k < in_spatial_size_; k++) {
141 serial_size_t index = j*in_spatial_size_ + k;
144 = curr_delta[i][index] - mean_delta[j] - mean_delta_dot_y[j] * curr_out[i][index];
147 prev_delta[i][index] /= stddev_[j];
154 std::vector<tensor_t*>& out_data)
override {
155 vec_t* mean =
nullptr;
156 vec_t* variance =
nullptr;
157 tensor_t& in = *in_data[0];
158 tensor_t& out = *out_data[0];
160 if (phase_ == net_phase::train) {
162 mean = &mean_current_;
163 variance = &variance_current_;
164 moments(*in_data[0], in_spatial_size_, in_channels_, mean, variance);
169 variance = &variance_;
173 calc_stddev(*variance);
176 const float_t* inptr = &in[i][0];
177 float_t* outptr = &out[i][0];
179 for (serial_size_t j = 0; j < in_channels_; j++) {
180 float_t m = (*mean)[j];
182 for (serial_size_t k = 0; k < in_spatial_size_; k++) {
183 *outptr++ = (*inptr++ - m) / stddev_[j];
188 if (phase_ == net_phase::train && update_immidiately_) {
189 mean_ = mean_current_;
190 variance_ = variance_current_;
199 std::string
layer_type()
const override {
return "batch-norm"; }
202 for (serial_size_t i = 0; i < mean_.size(); i++) {
203 mean_[i] = momentum_ * mean_[i] + (1 - momentum_) * mean_current_[i];
204 variance_[i] = momentum_ * variance_[i] + (1 - momentum_) * variance_current_[i];
208 virtual void save(std::ostream& os)
const override {
210 for (
auto m : mean_) os << m <<
" ";
211 for (
auto v : variance_) os << v <<
" ";
214 virtual void load(std::istream& is)
override {
216 for (
auto& m : mean_) is >> m;
217 for (
auto& v : variance_) is >> v;
220 virtual void load(
const std::vector<float_t>& src,
int& idx)
override {
221 Base::load(src, idx);
222 for (
auto& m : mean_) m = src[idx++];
223 for (
auto& v : variance_) v = src[idx++];
226 void update_immidiately(
bool update) {
227 update_immidiately_ = update;
230 void set_stddev(
const vec_t& stddev) {
234 void set_mean(
const vec_t& mean) {
238 void set_variance(
const vec_t& variance) {
239 variance_ = variance;
240 calc_stddev(variance);
243 template <
class Archive>
244 static void load_and_construct(Archive & ar, cereal::construct<batch_normalization_layer> & construct) {
247 float_t eps, momentum;
249 vec_t mean, variance;
251 ar(cereal::make_nvp(
"in_spatial_size", in_spatial_size),
253 cereal::make_nvp(
"epsilon", eps),
254 cereal::make_nvp(
"momentum", momentum),
255 cereal::make_nvp(
"phase", phase),
256 cereal::make_nvp(
"mean", mean),
257 cereal::make_nvp(
"variance", variance));
258 construct(in_spatial_size,
in_channels, eps, momentum, phase);
259 construct->set_mean(mean);
260 construct->set_variance(variance);
263 template <
class Archive>
264 void serialize(Archive & ar) {
265 layer::serialize_prolog(ar);
266 ar(cereal::make_nvp(
"in_spatial_size", in_spatial_size_),
267 cereal::make_nvp(
"in_channels", in_channels_),
268 cereal::make_nvp(
"epsilon", eps_),
269 cereal::make_nvp(
"momentum", momentum_),
270 cereal::make_nvp(
"phase", phase_),
271 cereal::make_nvp(
"mean", mean_),
272 cereal::make_nvp(
"variance", variance_));
275 float_t epsilon()
const {
279 float_t momentum()
const {
284 void calc_stddev(
const vec_t& variance) {
285 for (
size_t i = 0; i < in_channels_; i++) {
286 stddev_[i] = sqrt(variance[i] + eps_);
291 mean_current_.resize(in_channels_);
292 mean_.resize(in_channels_);
293 variance_current_.resize(in_channels_);
294 variance_.resize(in_channels_);
295 tmp_mean_.resize(in_channels_);
296 stddev_.resize(in_channels_);
299 serial_size_t in_channels_;
300 serial_size_t in_spatial_size_;
308 vec_t variance_current_;
318 bool update_immidiately_;
Batch Normalization.
Definition: batch_normalization_layer.h:42
void set_context(net_phase ctx) override
notify changing context (train <=> test)
Definition: batch_normalization_layer.h:194
void back_propagation(const std::vector< tensor_t * > &in_data, const std::vector< tensor_t * > &out_data, std::vector< tensor_t * > &out_grad, std::vector< tensor_t * > &in_grad) override
return delta of previous layer (delta=\frac{dE}{da}, a=wx in fully-connected layer)
Definition: batch_normalization_layer.h:110
virtual void post_update() override
return delta2 of previous layer (delta2=\frac{d^2E}{da^2}, diagonal of hessian matrix) it is never ca...
Definition: batch_normalization_layer.h:201
std::vector< index3d< serial_size_t > > out_shape() const override
array of output shapes (width x height x depth)
Definition: batch_normalization_layer.h:106
void forward_propagation(const std::vector< tensor_t * > &in_data, std::vector< tensor_t * > &out_data) override
Definition: batch_normalization_layer.h:153
std::string layer_type() const override
name of layer, should be unique for each concrete class
Definition: batch_normalization_layer.h:199
std::vector< index3d< serial_size_t > > in_shape() const override
array of input shapes (width x height x depth)
Definition: batch_normalization_layer.h:102
serial_size_t fan_in_size() const override
number of outgoing connections for each input unit
Definition: batch_normalization_layer.h:93
virtual ~batch_normalization_layer()
number of incoming connections for each output unit
Definition: batch_normalization_layer.h:90
batch_normalization_layer(const layer &prev_layer, float_t epsilon=1e-5, float_t momentum=0.999, net_phase phase=net_phase::train)
Definition: batch_normalization_layer.h:52
serial_size_t fan_out_size() const override
number of outgoing connections for each input unit used only for weight/bias initialization methods w...
Definition: batch_normalization_layer.h:98
batch_normalization_layer(serial_size_t in_spatial_size, serial_size_t in_channels, float_t epsilon=1e-5, float_t momentum=0.999, net_phase phase=net_phase::train)
Definition: batch_normalization_layer.h:74
base class of all kind of NN layers
Definition: layer.h:62
bool parallelize_
Flag indicating whether the layer/node operations ara paralellized.
Definition: layer.h:696
serial_size_t in_channels() const
number of outgoing edges in this layer
Definition: layer.h:146
SGD with momentum.
Definition: optimizer.h:178