tiny_dnn  1.0.0
A header only, dependency-free deep learning framework in C++11
nodes.h
1 /*
2  Copyright (c) 2016, Taiga Nomi
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are met:
7  * Redistributions of source code must retain the above copyright
8  notice, this list of conditions and the following disclaimer.
9  * Redistributions in binary form must reproduce the above copyright
10  notice, this list of conditions and the following disclaimer in the
11  documentation and/or other materials provided with the distribution.
12  * Neither the name of the <organization> nor the
13  names of its contributors may be used to endorse or promote products
14  derived from this software without specific prior written permission.
15 
16  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
17  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
20  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 #pragma once
28 
29 #include <vector>
30 #include <tuple>
31 #include <unordered_map>
32 #include <cereal/types/utility.hpp>
33 #include <cereal/types/tuple.hpp>
34 
35 #include "tiny_dnn/util/util.h"
36 #include "tiny_dnn/layers/layer.h"
37 #include "tiny_dnn/optimizers/optimizer.h"
38 
39 namespace cereal {
40 
41 template <typename Archive>
42 void save(Archive & ar, const std::vector<tiny_dnn::layerptr_t>& v) {
43  ar(cereal::make_size_tag((cereal::size_type)v.size()));
44  for (auto n : v) {
45  tiny_dnn::layer::save_layer(ar, *n);
46  }
47 }
48 
49 
50 template <typename Archive>
51 void load(Archive & ar, std::vector<std::shared_ptr<tiny_dnn::layer>>& v) {
52  cereal::size_type size;
53  ar(cereal::make_size_tag(size));
54 
55  for (size_t i = 0; i < size; i++) {
56  v.emplace_back(tiny_dnn::layer::load_layer(ar));
57  }
58 }
59 
60 }
61 
62 namespace tiny_dnn {
63 
85 class nodes {
86  public:
87  typedef std::vector<layerptr_t>::iterator iterator;
88  typedef std::vector<layerptr_t>::const_iterator const_iterator;
89 
95  virtual
96  void backward(const std::vector<tensor_t>& first) = 0;
97 
102  virtual
103  std::vector<tensor_t> forward(const std::vector<tensor_t>& first) = 0; // NOLINT
104 
108  virtual
109  void update_weights(optimizer *opt, int batch_size) {
110  for (auto l : nodes_) {
111  l->update_weight(opt, batch_size);
112  }
113  }
114 
118  virtual void setup(bool reset_weight) {
119  for (auto l : nodes_) {
120  l->setup(reset_weight);
121  }
122  }
123 
124  void clear_grads() {
125  for (auto l : nodes_) {
126  l->clear_grads();
127  }
128  }
129 
130  size_t size() const { return nodes_.size(); }
131  iterator begin() { return nodes_.begin(); }
132  iterator end() { return nodes_.end(); }
133  const_iterator begin() const { return nodes_.begin(); }
134  const_iterator end() const { return nodes_.end(); }
135  layer* operator[] (size_t index) { return nodes_[index]; }
136  const layer* operator[] (size_t index) const { return nodes_[index]; }
137  serial_size_t in_data_size() const { return nodes_.front()->in_data_size(); }
138  serial_size_t out_data_size() const { return nodes_.back()->out_data_size(); }
139 
140  template <typename T>
141  const T& at(size_t index) const {
142  const T* v = dynamic_cast<const T*>(nodes_[index]);
143  if (v) return *v;
144  throw nn_error("failed to cast");
145  }
146 
147  template <typename T>
148  T& at(size_t index) {
149  T* v = dynamic_cast<T*>(nodes_[index]);
150  if (v) return *v;
151  throw nn_error("failed to cast");
152  }
153 
154  // @todo: multiple output
155  virtual float_t target_value_min(int out_channel = 0) const {
156  CNN_UNREFERENCED_PARAMETER(out_channel);
157  return nodes_.back()->out_value_range().first;
158  }
159 
160  virtual float_t target_value_max(int out_channel = 0) const {
161  CNN_UNREFERENCED_PARAMETER(out_channel);
162  return nodes_.back()->out_value_range().second;
163  }
164 
165  void save(std::ostream& os) const { // NOLINT
166  for (auto& l : nodes_) {
167  l->save(os);
168  }
169  }
170 
171  void load(std::istream& is) { // NOLINT
172  setup(false);
173  for (auto& l : nodes_) {
174  l->load(is);
175  }
176  }
177 
178  virtual void load(const std::vector<float_t>& vec) {
179  int idx = 0;
180  setup(false);
181  for (auto& l : nodes_) {
182  l->load(vec, idx);
183  }
184  }
185 
186  void label2vec(const label_t* t, serial_size_t num, std::vector<vec_t> *vec) const {
187  serial_size_t outdim = out_data_size();
188 
189  vec->reserve(num);
190  for (serial_size_t i = 0; i < num; i++) {
191  assert(t[i] < outdim);
192  vec->emplace_back(outdim, target_value_min());
193  vec->back()[t[i]] = target_value_max();
194  }
195  }
196 
197  template <typename OutputArchive>
198  void save_model(OutputArchive & oa) const;
199 
200  template <typename InputArchive>
201  void load_model(InputArchive & ia);
202 
203 
204  template <typename OutputArchive>
205  void save_weights(OutputArchive & oa) const {
206  for (auto n : nodes_) {
207  oa(*n);
208  }
209  }
210 
211  template <typename InputArchive>
212  void load_weights(InputArchive & ia) {
213  for (auto n : nodes_) {
214  ia(*n);
215  }
216  }
217 
218  protected:
219  template <typename T>
220  void push_back(T&& node) {
221  push_back_impl(std::forward<T>(node),
222  typename std::is_rvalue_reference<decltype(node)>::type()); // NOLINT
223  }
224 
225  template <typename T>
226  void push_back(std::shared_ptr<T> node) {
227  own_nodes_.push_back(node);
228  nodes_.push_back(own_nodes_.back().get());
229  }
230 
231  // transform indexing so that it's more suitable for per-layer operations
232  // input: [sample][channel][feature]
233  // output: [channel][sample][feature]
234  std::vector<tensor_t> reorder_for_layerwise_processing(const std::vector<tensor_t>& input) {
235  const serial_size_t sample_count = static_cast<serial_size_t>(input.size());
236  const serial_size_t channel_count = static_cast<serial_size_t>(input[0].size());
237 
238  // @todo we could perhaps pass pointers to underlying vec_t objects, in order to avoid copying
239  std::vector<tensor_t> output(channel_count, tensor_t(sample_count));
240 
241  for (serial_size_t sample = 0; sample < sample_count; ++sample) {
242  assert(input[sample].size() == channel_count);
243  for (serial_size_t channel = 0; channel < channel_count; ++channel) {
244  output[channel][sample] = input[sample][channel];
245  }
246  }
247 
248  return output;
249  }
250 
251  template <typename T>
252  void push_back_impl(T&& node, std::true_type) { // is_rvalue_reference
253  own_nodes_.push_back(std::make_shared<
254  typename std::remove_reference<T>::type>(std::forward<T>(node)));
255  nodes_.push_back(own_nodes_.back().get());
256  }
257 
258  template <typename T>
259  void push_back_impl(T&& node, std::false_type) {
260  nodes_.push_back(&node);
261  }
262 
263  /* Nodes which this class has ownership */
264  std::vector<std::shared_ptr<layer>> own_nodes_;
265  /* List of all nodes which includes own_nodes */
266  std::vector<layerptr_t> nodes_;
267 };
268 
272 class sequential : public nodes {
273  public:
274  void backward(const std::vector<tensor_t>& first) override {
275 
276  const std::vector<tensor_t> reordered_grad = reorder_for_layerwise_processing(first);
277  assert(reordered_grad.size() == 1);
278 
279  nodes_.back()->set_out_grads({ reordered_grad[0] });
280 
281  for (auto l = nodes_.rbegin(); l != nodes_.rend(); l++) {
282  (*l)->backward();
283  }
284  }
285 
286  std::vector<tensor_t> forward(const std::vector<tensor_t>& first) override {
287 
288  const std::vector<tensor_t> reordered_data = reorder_for_layerwise_processing(first);
289  assert(reordered_data.size() == 1);
290 
291  nodes_.front()->set_in_data({ reordered_data[0] });
292 
293  for (auto l : nodes_) {
294  l->forward();
295  }
296 
297  const std::vector<tensor_t> out = nodes_.back()->output();
298 
299  return normalize_out(out);
300  }
301 
302  template <typename T>
303  void add(T&& layer) {
304  push_back(std::forward<T>(layer));
305 
306  if (nodes_.size() != 1) {
307  auto head = nodes_[nodes_.size()-2];
308  auto tail = nodes_[nodes_.size()-1];
309  connect(head, tail, 0, 0);
310  auto out = head->outputs();
311  auto in = tail->inputs();
312  }
313  check_connectivity();
314  }
315 
316  void check_connectivity() {
317  for (serial_size_t i = 0; i < nodes_.size() - 1; i++) {
318  auto out = nodes_[i]->outputs();
319  auto in = nodes_[i+1]->inputs();
320 
321  if (out[0] != in[0]) {
322  throw nn_error("");
323  }
324  }
325  }
326 
327  template <typename InputArchive>
328  void load_connections(InputArchive& ia) {
329  for (serial_size_t i = 0; i < nodes_.size() - 1; i++) {
330  auto head = nodes_[i];
331  auto tail = nodes_[i + 1];
332  connect(head, tail, 0, 0);
333  }
334  }
335 
336  template <typename OutputArchive>
337  void save_connections(OutputArchive& ) const { }
338 
339 private:
340  friend class nodes;
341 
342  std::vector<tensor_t> normalize_out(const std::vector<tensor_t>& out)
343  {
344  // normalize indexing back to [sample][layer][feature]
345  std::vector<tensor_t> normalized_output;
346 
347  const size_t sample_count = out[0].size();
348  normalized_output.resize(sample_count, tensor_t(1));
349 
350  for (size_t sample = 0; sample < sample_count; ++sample) {
351  normalized_output[sample][0] = out[0][sample];
352  }
353 
354  return normalized_output;
355  }
356 };
357 
362 class graph : public nodes {
363  public:
364  void backward(const std::vector<tensor_t>& out_grad) override {
365 
366  serial_size_t output_channel_count = static_cast<serial_size_t>(out_grad[0].size());
367 
368  if (output_channel_count != output_layers_.size()) {
369  throw nn_error("input size mismatch");
370  }
371 
372  const std::vector<tensor_t> reordered_grad = reorder_for_layerwise_processing(out_grad);
373  assert(reordered_grad.size() == output_channel_count);
374 
375  for (serial_size_t i = 0; i < output_channel_count; i++) {
376  output_layers_[i]->set_out_grads({ reordered_grad[i] });
377  }
378 
379  for (auto l = nodes_.rbegin(); l != nodes_.rend(); l++) {
380  (*l)->backward();
381  }
382  }
383 
384  std::vector<tensor_t> forward(const std::vector<tensor_t>& in_data) override {
385 
386  serial_size_t input_data_channel_count = static_cast<serial_size_t>(in_data[0].size());
387 
388  if (input_data_channel_count != input_layers_.size()) {
389  throw nn_error("input size mismatch");
390  }
391 
392  const std::vector<tensor_t> reordered_data = reorder_for_layerwise_processing(in_data);
393  assert(reordered_data.size() == input_data_channel_count);
394 
395  for (serial_size_t channel_index = 0; channel_index < input_data_channel_count; channel_index++) {
396  input_layers_[channel_index]->set_in_data({ reordered_data[channel_index] });
397  }
398 
399  for (auto l : nodes_) {
400  l->forward();
401  }
402  return merge_outs();
403  }
404 
405  void construct(const std::vector<layerptr_t>& input,
406  const std::vector<layerptr_t>& output) {
407  std::vector<layerptr_t> sorted;
408  std::vector<nodeptr_t> input_nodes(input.begin(), input.end());
409  std::unordered_map<node*, std::vector<uint8_t>> removed_edge;
410 
411  // topological-sorting
412  while (!input_nodes.empty()) {
413  sorted.push_back(dynamic_cast<layerptr_t>(input_nodes.back()));
414  input_nodes.pop_back();
415 
416  layerptr_t curr = sorted.back();
417  std::vector<node*> next = curr->next_nodes();
418 
419  for (size_t i = 0; i < next.size(); i++) {
420  if (!next[i]) continue;
421  // remove edge between next[i] and current
422  if (removed_edge.find(next[i]) == removed_edge.end()) {
423  removed_edge[next[i]] =
424  std::vector<uint8_t>(next[i]->prev_nodes().size(), 0);
425  }
426 
427  std::vector<uint8_t>& removed = removed_edge[next[i]];
428  removed[find_index(next[i]->prev_nodes(), curr)] = 1;
429 
430  if (std::all_of(removed.begin(), removed.end(), [](uint8_t x) {
431  return x == 1; })) {
432  input_nodes.push_back(next[i]);
433  }
434  }
435  }
436 
437  for (auto& n : sorted) {
438  nodes_.push_back(n);
439  }
440 
441  input_layers_ = input;
442  output_layers_ = output;
443 
444  setup(false);
445  }
446 
447 private:
448  friend class nodes;
449 
450  struct _graph_connection {
451  void add_connection(serial_size_t head, serial_size_t tail, serial_size_t head_index, serial_size_t tail_index) {
452  if (!is_connected(head, tail, head_index, tail_index)) {
453  connections.emplace_back(head, tail, head_index, tail_index);
454  }
455  }
456 
457  bool is_connected(serial_size_t head, serial_size_t tail, serial_size_t head_index, serial_size_t tail_index) const {
458  return std::find(connections.begin(),
459  connections.end(),
460  std::make_tuple(head, tail, head_index, tail_index)) != connections.end();
461  }
462 
463  template <typename Archive>
464  void serialize(Archive & ar) {
465  ar(CEREAL_NVP(connections), CEREAL_NVP(in_nodes), CEREAL_NVP(out_nodes));
466  }
467 
468  std::vector<std::tuple<serial_size_t, serial_size_t, serial_size_t, serial_size_t>> connections;
469  std::vector<serial_size_t> in_nodes, out_nodes;
470  };
471 
472  template <typename OutputArchive>
473  void save_connections(OutputArchive& oa) const {
474  _graph_connection gc;
475  std::unordered_map<node*, serial_size_t> node2id;
476  serial_size_t idx = 0;
477 
478  for (auto n : nodes_) {
479  node2id[n] = idx++;
480  }
481  for (auto l : input_layers_) {
482  gc.in_nodes.push_back(node2id[l]);
483  }
484  for (auto l : output_layers_) {
485  gc.out_nodes.push_back(node2id[l]);
486  }
487 
488  for (auto l : input_layers_) {
489  graph_traverse(l, [=](layer& l) {}, [&](edge& e) {
490  auto next = e.next();
491  serial_size_t head_index = e.prev()->next_port(e);
492 
493  for (auto n : next) {
494  serial_size_t tail_index = n->prev_port(e);
495  gc.add_connection(node2id[e.prev()], node2id[n], head_index, tail_index);
496  }
497  });
498  }
499 
500  oa(cereal::make_nvp("graph", gc));
501  }
502 
503  template <typename InputArchive>
504  void load_connections(InputArchive& ia) {
505  _graph_connection gc;
506  ia(cereal::make_nvp("graph", gc));
507 
508  for (auto c : gc.connections) {
509  serial_size_t head, tail, head_index, tail_index;
510  std::tie(head, tail, head_index, tail_index) = c;
511  connect(nodes_[head], nodes_[tail], head_index, tail_index);
512  }
513  for (auto in : gc.in_nodes) {
514  input_layers_.push_back(nodes_[in]);
515  }
516  for (auto out : gc.out_nodes) {
517  output_layers_.push_back(nodes_[out]);
518  }
519  }
520 
521  // normalize indexing back to [sample][layer][feature]
522  std::vector<tensor_t> merge_outs() {
523  std::vector<tensor_t> merged;
524  serial_size_t output_channel_count = static_cast<serial_size_t>(output_layers_.size());
525  for (serial_size_t output_channel = 0; output_channel < output_channel_count; ++output_channel) {
526  std::vector<tensor_t> out = output_layers_[output_channel]->output();
527 
528  serial_size_t sample_count = static_cast<serial_size_t>(out[0].size());
529  if (output_channel == 0) {
530  assert(merged.empty());
531  merged.resize(sample_count, tensor_t(output_channel_count));
532  }
533 
534  assert(merged.size() == sample_count);
535 
536  for (serial_size_t sample = 0; sample < sample_count; ++sample) {
537  merged[sample][output_channel] = out[0][sample];
538  }
539  }
540  return merged;
541  }
542 
543  serial_size_t find_index(const std::vector<node*>& nodes,
544  layerptr_t target) {
545  for (serial_size_t i = 0; i < nodes.size(); i++) {
546  if (nodes[i] == static_cast<node*>(&*target)) return i;
547  }
548  throw nn_error("invalid connection");
549  }
550  std::vector<layerptr_t> input_layers_;
551  std::vector<layerptr_t> output_layers_;
552 };
553 
554 
555 
556 template <typename OutputArchive>
557 void nodes::save_model(OutputArchive & oa) const {
558  oa(cereal::make_nvp("nodes", nodes_));
559 
560  if (typeid(*this) == typeid(sequential)) {
561  dynamic_cast<const sequential*>(this)->save_connections(oa);
562  }
563  else {
564  dynamic_cast<const graph*>(this)->save_connections(oa);
565  }
566 }
567 
568 template <typename InputArchive>
569 void nodes::load_model(InputArchive & ia) {
570  own_nodes_.clear();
571  nodes_.clear();
572 
573  ia(cereal::make_nvp("nodes", own_nodes_));
574 
575  for (auto& n : own_nodes_) {
576  nodes_.push_back(&*n);
577  }
578 
579  if (typeid(*this) == typeid(sequential)) {
580  dynamic_cast<sequential*>(this)->load_connections(ia);
581  }
582  else {
583  dynamic_cast<graph*>(this)->load_connections(ia);
584  }
585 }
586 
587 
588 } // namespace tiny_dnn
589 
generic graph network
Definition: nodes.h:362
void backward(const std::vector< tensor_t > &out_grad) override
propagate gradient
Definition: nodes.h:364
std::vector< tensor_t > forward(const std::vector< tensor_t > &in_data) override
Definition: nodes.h:384
base class of all kind of NN layers
Definition: layer.h:62
static std::shared_ptr< layer > load_layer(InputArchive &ia)
generate layer from cereal's Archive
Definition: deserialization_helper.h:159
error exception class for tiny-dnn
Definition: nn_error.h:37
basic class of various network types (sequential, multi-in/multi-out).
Definition: nodes.h:85
virtual void backward(const std::vector< tensor_t > &first)=0
propagate gradient
virtual void update_weights(optimizer *opt, int batch_size)
update weights and clear all gradients
Definition: nodes.h:109
virtual std::vector< tensor_t > forward(const std::vector< tensor_t > &first)=0
virtual void setup(bool reset_weight)
setup all weights, must be called before forward/backward
Definition: nodes.h:118
single-input, single-output feedforward network
Definition: nodes.h:272
std::vector< tensor_t > forward(const std::vector< tensor_t > &first) override
Definition: nodes.h:286
void backward(const std::vector< tensor_t > &first) override
propagate gradient
Definition: nodes.h:274
base class of optimizer usesHessian : true if an optimizer uses hessian (2nd order derivative of loss...
Definition: optimizer.h:37