tiny_dnn  1.0.0
A header only, dependency-free deep learning framework in C++11
network.h
1 /*
2  Copyright (c) 2013, Taiga Nomi
3  All rights reserved.
4 
5  Redistribution and use in source and binary forms, with or without
6  modification, are permitted provided that the following conditions are met:
7  * Redistributions of source code must retain the above copyright
8  notice, this list of conditions and the following disclaimer.
9  * Redistributions in binary form must reproduce the above copyright
10  notice, this list of conditions and the following disclaimer in the
11  documentation and/or other materials provided with the distribution.
12  * Neither the name of the <organization> nor the
13  names of its contributors may be used to endorse or promote products
14  derived from this software without specific prior written permission.
15 
16  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
17  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
20  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 #pragma once
28 #include <iostream>
29 #include <stdexcept>
30 #include <algorithm>
31 #include <iterator>
32 #include <iomanip>
33 #include <map>
34 #include <set>
35 #include <limits>
36 #include <string>
37 #include <vector>
38 
39 #include "tiny_dnn/nodes.h"
40 #include "tiny_dnn/util/util.h"
41 #include "tiny_dnn/lossfunctions/loss_function.h"
42 #include "tiny_dnn/activations/activation_function.h"
43 
44 namespace tiny_dnn {
45 
46 enum class content_type {
47  weights,
48  model,
49  weights_and_model
50 };
51 
52 enum class file_format {
53  binary,
54  json
55 };
56 
57 struct result {
58  result() : num_success(0), num_total(0) {}
59 
60  float_t accuracy() const {
61  return float_t(num_success * 100.0 / num_total);
62  }
63 
64  template <typename Char, typename CharTraits>
65  void print_summary(std::basic_ostream<Char, CharTraits>& os) const {
66  os << "accuracy:" << accuracy()
67  << "% (" << num_success << "/"
68  << num_total << ")" << std::endl;
69  }
70 
71  template <typename Char, typename CharTraits>
72  void print_detail(std::basic_ostream<Char, CharTraits>& os) const {
73  print_summary(os);
74  auto all_labels = labels();
75 
76  os << std::setw(5) << "*" << " ";
77  for (auto c : all_labels)
78  os << std::setw(5) << c << " ";
79  os << std::endl;
80 
81  for (auto r : all_labels) {
82  os << std::setw(5) << r << " ";
83  const auto row_iter = confusion_matrix.find(r);
84  for (auto c : all_labels) {
85  int count = 0;
86  if (row_iter != confusion_matrix.end()) {
87  const auto& row = row_iter->second;
88  const auto col_iter = row.find(c);
89  if (col_iter != row.end()) {
90  count = col_iter->second;
91  }
92  }
93  os << std::setw(5) << count << " ";
94  }
95  os << std::endl;
96  }
97  }
98 
99  std::set<label_t> labels() const {
100  std::set<label_t> all_labels;
101  for (auto r : confusion_matrix) {
102  all_labels.insert(r.first);
103  for (auto c : r.second)
104  all_labels.insert(c.first);
105  }
106  return all_labels;
107  }
108 
109  int num_success;
110  int num_total;
111  std::map<label_t, std::map<label_t, int> > confusion_matrix;
112 };
113 
114 enum grad_check_mode {
115  GRAD_CHECK_ALL,
116  GRAD_CHECK_RANDOM
117 };
118 
119 template <typename NetType>
120 class network;
121 
122 template <typename Layer>
123 network<sequential>& operator << (network<sequential>& n, Layer&& l);
124 
125 void construct_graph(network<graph>& graph,
126  const std::vector<std::shared_ptr<layer>>& inputs,
127  const std::vector<std::shared_ptr<layer>>& outputs);
128 void construct_graph(network<graph>& graph,
129  const std::vector<layer*>& inputs,
130  const std::vector<layer*>& outputs);
166 template<typename NetType>
167 class network {
168  public:
169  typedef typename std::vector<layerptr_t>::iterator iterator;
170  typedef typename std::vector<layerptr_t>::const_iterator const_iterator;
171 
172  explicit network(const std::string& name = "") : name_(name) {}
173 
177  std::string name() const { return name_; }
178 
182  void init_weight() { net_.setup(true); }
183 
187  vec_t predict(const vec_t& in) { return fprop(in); }
188 
192  tensor_t predict(const tensor_t& in) { return fprop(in); }
193 
197  std::vector<tensor_t> predict(const std::vector<tensor_t>& in) { return fprop(in); }
198 
202  float_t predict_max_value(const vec_t& in) {
203  return fprop_max(in);
204  }
205 
209  label_t predict_label(const vec_t& in) {
210  return fprop_max_index(in);
211  }
212 
218  template <typename Range>
219  vec_t predict(const Range& in) {
220  using std::begin; // for ADL
221  using std::end;
222  return predict(vec_t(begin(in), end(in)));
223  }
224 
225 
245  template <typename Error, typename Optimizer,
246  typename OnBatchEnumerate, typename OnEpochEnumerate>
247  bool train(Optimizer& optimizer,
248  const std::vector<vec_t>& inputs,
249  const std::vector<label_t>& class_labels,
250  size_t batch_size,
251  int epoch,
252  OnBatchEnumerate on_batch_enumerate,
253  OnEpochEnumerate on_epoch_enumerate,
254  const bool reset_weights = false,
255  const int n_threads = CNN_TASK_SIZE,
256  const std::vector<vec_t>& t_cost = std::vector<vec_t>()) {
257  std::vector<tensor_t> input_tensor, output_tensor, t_cost_tensor;
258  normalize_tensor(inputs, input_tensor);
259  normalize_tensor(class_labels, output_tensor);
260  if (!t_cost.empty()) normalize_tensor(t_cost, t_cost_tensor);
261 
262  return fit<Error>(optimizer, input_tensor, output_tensor, batch_size,
263  epoch, on_batch_enumerate, on_epoch_enumerate,
264  reset_weights, n_threads, t_cost_tensor);
265  }
266 
309  template <typename Error, typename Optimizer,
310  typename OnBatchEnumerate, typename OnEpochEnumerate,
311  typename T, typename U>
312  bool fit(Optimizer& optimizer,
313  const std::vector<T>& inputs,
314  const std::vector<U>& desired_outputs,
315  size_t batch_size,
316  int epoch,
317  OnBatchEnumerate on_batch_enumerate,
318  OnEpochEnumerate on_epoch_enumerate,
319  const bool reset_weights = false,
320  const int n_threads = CNN_TASK_SIZE,
321  const std::vector<U>& t_cost = std::vector<U>()) {
322  std::vector<tensor_t> input_tensor, output_tensor, t_cost_tensor;
323  normalize_tensor(inputs, input_tensor);
324  normalize_tensor(desired_outputs, output_tensor);
325  if (!t_cost.empty()) normalize_tensor(t_cost, t_cost_tensor);
326 
327  return fit<Error>(optimizer, input_tensor, output_tensor, batch_size,
328  epoch, on_batch_enumerate, on_epoch_enumerate,
329  reset_weights, n_threads, t_cost_tensor);
330  }
331 
339  template<typename Error, typename Optimizer, typename T, typename U>
340  bool fit(Optimizer& optimizer,
341  const std::vector<T>& inputs,
342  const std::vector<U>& desired_outputs,
343  size_t batch_size = 1,
344  int epoch = 1) {
345  return fit<Error>(optimizer, inputs, desired_outputs,
346  batch_size, epoch, nop, nop);
347  }
348 
356  template<typename Error, typename Optimizer>
357  bool train(Optimizer& optimizer,
358  const std::vector<vec_t>& inputs,
359  const std::vector<label_t>& class_labels,
360  size_t batch_size = 1,
361  int epoch = 1) {
362  return train<Error>(optimizer, inputs, class_labels,
363  batch_size, epoch, nop, nop);
364  }
365 
369  template<typename Error, typename Optimizer>
370  bool train(Optimizer& optimizer,
371  const std::vector<vec_t>& in,
372  const std::vector<vec_t>& t,
373  size_t batch_size = 1,
374  int epoch = 1) {
375  return fit<Error>(optimizer, in, t, batch_size, epoch, nop, nop);
376  }
377 
382  void set_netphase(net_phase phase) {
383  for (auto n : net_) {
384  n->set_context(phase);
385  }
386  }
387 
391  result test(const std::vector<vec_t>& in, const std::vector<label_t>& t) {
392  result test_result;
393  set_netphase(net_phase::test);
394  for (size_t i = 0; i < in.size(); i++) {
395  const label_t predicted = fprop_max_index(in[i]);
396  const label_t actual = t[i];
397 
398  if (predicted == actual) test_result.num_success++;
399  test_result.num_total++;
400  test_result.confusion_matrix[predicted][actual]++;
401  }
402  return test_result;
403  }
404 
408  std::vector<vec_t> test(const std::vector<vec_t>& in) {
409  std::vector<vec_t> test_result(in.size());
410  set_netphase(net_phase::test);
411  for (size_t i = 0; i < in.size(); i++) {
412  test_result[i] = predict(in[i]);
413  }
414  return test_result;
415  }
416 
420  template <typename E>
421  float_t get_loss(const std::vector<vec_t>& in,
422  const std::vector<vec_t>& t) {
423  float_t sum_loss = float_t(0);
424 
425  for (size_t i = 0; i < in.size(); i++) {
426  const vec_t predicted = predict(in[i]);
427  sum_loss += E::f(predicted, t[i]);
428  }
429  return sum_loss;
430  }
431 
435  template <typename E, typename T>
436  float_t get_loss(const std::vector<T>& in, const std::vector<tensor_t>& t) {
437  float_t sum_loss = float_t(0);
438  std::vector<tensor_t> in_tensor;
439  normalize_tensor(in, in_tensor);
440 
441  for (size_t i = 0; i < in.size(); i++) {
442  const tensor_t predicted = predict(in_tensor[i]);
443  for (size_t j = 0; j < predicted.size(); j++) {
444  sum_loss += E::f(predicted[j], t[i][j]);
445  }
446  }
447  return sum_loss;
448  }
449 
455  template <typename E>
456  bool gradient_check(const std::vector<tensor_t>& in,
457  const std::vector<std::vector<label_t>>& t,
458  float_t eps, grad_check_mode mode) {
459  assert(in.size() == t.size());
460 
461  std::vector<tensor_t> v(t.size());
462  const serial_size_t sample_count = static_cast<serial_size_t>(t.size());
463  for (serial_size_t sample = 0; sample < sample_count; ++sample) {
464  net_.label2vec(&t[sample][0], static_cast<serial_size_t>(t[sample].size()), &v[sample]);
465  }
466 
467  for (auto current : net_) { // ignore first input layer
468  if (current->weights().size() < 2) {
469  continue;
470  }
471  vec_t& w = *current->weights()[0];
472  vec_t& b = *current->weights()[1];
473  tensor_t& dw = (*current->weights_grads()[0]);
474  tensor_t& db = (*current->weights_grads()[1]);
475 
476  if (w.empty()) continue;
477 
478  switch (mode) {
479  case GRAD_CHECK_ALL:
480  for (int i = 0; i < static_cast<int>(w.size()); i++)
481  if (!calc_delta<E>(in, v, w, dw, i, eps)) {
482  return false;
483  }
484  for (int i = 0; i < static_cast<int>(b.size()); i++)
485  if (!calc_delta<E>(in, v, b, db, i, eps)) {
486  return false;
487  }
488  break;
489  case GRAD_CHECK_RANDOM:
490  for (int i = 0; i < 10; i++)
491  if (!calc_delta<E>(in, v, w, dw, uniform_idx(w), eps)) {
492  return false;
493  }
494  for (int i = 0; i < 10; i++)
495  if (!calc_delta<E>(in, v, b, db, uniform_idx(b), eps)) {
496  return false;
497  }
498  break;
499  default:
500  throw nn_error("unknown grad-check type");
501  }
502  }
503  return true;
504  }
505 
509  size_t layer_size() const {
510  return net_.size();
511  }
512 
516  size_t depth() const {
517  return layer_size();
518  }
519 
523  const layer* operator[] (size_t index) const {
524  return net_[index];
525  }
526 
530  layer* operator[] (size_t index) {
531  return net_[index];
532  }
533 
538  template <typename T>
539  const T& at(size_t index) const {
540  return net_.template at<T>(index);
541  }
542 
543  template <typename T>
544  T& at(size_t index) {
545  return net_.template at<T>(index);
546  }
547 
551  serial_size_t out_data_size() const {
552  return net_.out_data_size();
553  }
554 
558  serial_size_t in_data_size() const {
559  return net_.in_data_size();
560  }
561 
565  template <typename WeightInit>
566  network& weight_init(const WeightInit& f) {
567  auto ptr = std::make_shared<WeightInit>(f);
568  for (auto& l : net_)
569  l->weight_init(ptr);
570  return *this;
571  }
572 
576  template <typename BiasInit>
577  network& bias_init(const BiasInit& f) {
578  auto ptr = std::make_shared<BiasInit>(f);
579  for (auto& l : net_)
580  l->bias_init(ptr);
581  return *this;
582  }
583 
587  template <typename T>
588  bool has_same_weights(const network<T>& rhs, float_t eps) const {
589  auto first1 = net_.begin();
590  auto first2 = rhs.net_.begin();
591  auto last1 = net_.end();
592  auto last2 = rhs.net_.end();
593 
594  for (; first1 != last1 && first2 != last2; ++first1, ++first2)
595  if (!(*first1)->has_same_weights(**first2, eps)) return false;
596  return true;
597  }
598 
599  iterator begin() { return net_.begin(); }
600  iterator end() { return net_.end(); }
601  const_iterator begin() const { return net_.begin(); }
602  const_iterator end() const { return net_.end(); }
603 
604  void load(const std::string& filename,
605  content_type what = content_type::weights_and_model,
606  file_format format = file_format::binary) {
607  std::ifstream ifs(filename.c_str(), std::ios::binary | std::ios::in);
608  if (ifs.fail() || ifs.bad())
609  throw nn_error("failed to open:" + filename);
610 
611  switch (format) {
612  case file_format::binary:
613  {
614  cereal::BinaryInputArchive bi(ifs);
615  from_archive(bi, what);
616  }
617  break;
618  case file_format::json:
619  {
620  cereal::JSONInputArchive ji(ifs);
621  from_archive(ji, what);
622  }
623  break;
624  default:
625  throw nn_error("invalid serialization format");
626  }
627  }
628 
629  void save(const std::string& filename,
630  content_type what = content_type::weights_and_model,
631  file_format format = file_format::binary) const {
632  std::ofstream ofs(filename.c_str(), std::ios::binary | std::ios::out);
633  if (ofs.fail() || ofs.bad())
634  throw nn_error("failed to open:" + filename);
635 
636  switch (format) {
637  case file_format::binary:
638  {
639  cereal::BinaryOutputArchive bo(ofs);
640  to_archive(bo, what);
641  }
642  break;
643  case file_format::json:
644  {
645  cereal::JSONOutputArchive jo(ofs);
646  to_archive(jo, what);
647  }
648  break;
649  default:
650  throw nn_error("invalid serialization format");
651  }
652  }
653 
657  std::string to_json() const {
658  std::stringstream ss;
659  {
660  cereal::JSONOutputArchive oa(ss);
661  to_archive(oa, content_type::model);
662  }
663  return ss.str();
664  }
665 
669  void from_json(const std::string& json_string) {
670  std::stringstream ss;
671  ss << json_string;
672  cereal::JSONInputArchive ia(ss);
673  from_archive(ia, content_type::model);
674  }
675 
677  void save(std::ostream& os) const {
678  os.precision(std::numeric_limits<tiny_dnn::float_t>::digits10);
679  net_.save(os);
680  }
681 
683  void load(std::istream& is) {
684  is.precision(std::numeric_limits<tiny_dnn::float_t>::digits10);
685  net_.load(is);
686  }
687 
692  void fast_load(const char* filepath) {
693  FILE* stream = fopen(filepath, "r");
694  std::vector<float_t> data;
695  double temp;
696  while (fscanf(stream, "%lf", &temp) > 0)
697  data.push_back(float_t(temp));
698  fclose(stream);
699 
700  net_.load(data);
701  }
702 
703  template <typename OutputArchive>
704  void to_archive(OutputArchive& ar,
705  content_type what = content_type::weights_and_model) const {
706  if (what == content_type::model ||
707  what == content_type::weights_and_model) {
708  net_.save_model(ar);
709  }
710  if (what == content_type::weights ||
711  what == content_type::weights_and_model) {
712  net_.save_weights(ar);
713  }
714  }
715 
716  template <typename InputArchive>
717  void from_archive(InputArchive& ar,
718  content_type what = content_type::weights_and_model) {
719  if (what == content_type::model ||
720  what == content_type::weights_and_model) {
721  net_.load_model(ar);
722  }
723  if (what == content_type::weights ||
724  what == content_type::weights_and_model) {
725  net_.load_weights(ar);
726  }
727  }
728 
729  protected:
730  float_t fprop_max(const vec_t& in, int idx = 0) {
731  const vec_t& prediction = fprop(in, idx);
732  return *std::max_element(std::begin(prediction), std::end(prediction));
733  }
734 
735  label_t fprop_max_index(const vec_t& in) {
736  return label_t(max_index(fprop(in)));
737  }
738 
739  private:
740  template <typename Layer>
741  friend network<sequential>& operator << (network<sequential>& n, Layer&& l);
742 
743  friend void construct_graph(network<graph>& graph,
744  const std::vector<std::shared_ptr<layer>>& inputs,
745  const std::vector<std::shared_ptr<layer>>& outputs);
746 
747  friend void construct_graph(network<graph>& graph,
748  const std::vector<layer*>& inputs,
749  const std::vector<layer*>& outputs);
750 
751  template <typename Error, typename Optimizer,
752  typename OnBatchEnumerate, typename OnEpochEnumerate>
753  bool fit(Optimizer& optimizer,
754  const std::vector<tensor_t>& inputs,
755  const std::vector<tensor_t>& desired_outputs,
756  size_t batch_size,
757  int epoch,
758  OnBatchEnumerate on_batch_enumerate,
759  OnEpochEnumerate on_epoch_enumerate,
760  const bool reset_weights = false,
761  const int n_threads = CNN_TASK_SIZE,
762  const std::vector<tensor_t>& t_cost = std::vector<tensor_t>()) {
763  // check_training_data(in, t);
764  check_target_cost_matrix(desired_outputs, t_cost);
765  set_netphase(net_phase::train);
766  net_.setup(reset_weights);
767 
768  for (auto n : net_)
769  n->set_parallelize(true);
770  optimizer.reset();
771  for (int iter = 0; iter < epoch; iter++) {
772  for (size_t i = 0; i < inputs.size(); i += batch_size) {
773  train_once<Error>(optimizer, &inputs[i], &desired_outputs[i],
774  static_cast<int>(std::min(batch_size, inputs.size() - i)),
775  n_threads,
776  get_target_cost_sample_pointer(t_cost, i));
777  on_batch_enumerate();
778 
779  /* if (i % 100 == 0 && layers_.is_exploded()) {
780  std::cout << "[Warning]Detected infinite value in weight. stop learning." << std::endl;
781  return false;
782  } */
783  }
784  on_epoch_enumerate();
785  }
786  set_netphase(net_phase::test);
787  return true;
788  }
789 
795  template <typename E, typename Optimizer>
796  void train_once(Optimizer& optimizer,
797  const tensor_t* in,
798  const tensor_t* t,
799  int size,
800  const int nbThreads,
801  const tensor_t* t_cost) {
802  if (size == 1) {
803  bprop<E>(fprop(in[0]), t[0], t_cost ? t_cost[0] : tensor_t());
804  net_.update_weights(&optimizer, 1);
805  } else {
806  train_onebatch<E>(optimizer, in, t, size, nbThreads, t_cost);
807  }
808  }
809 
817  template <typename E, typename Optimizer>
818  void train_onebatch(Optimizer& optimizer,
819  const tensor_t* in,
820  const tensor_t* t,
821  int batch_size,
822  const int num_tasks,
823  const tensor_t* t_cost) {
824  std::vector<tensor_t> in_batch(&in[0], &in[0] + batch_size);
825  std::vector<tensor_t> t_batch(&t[0], &t[0] + batch_size);
826  std::vector<tensor_t> t_cost_batch = t_cost
827  ? std::vector<tensor_t>(&t_cost[0], &t_cost[0] + batch_size)
828  : std::vector<tensor_t>();
829 
830  bprop<E>(fprop(in_batch), t_batch, t_cost_batch);
831  net_.update_weights(&optimizer, batch_size);
832  }
833 
834  vec_t fprop(const vec_t& in) {
835  if (in.size() != (size_t)in_data_size())
836  data_mismatch(**net_.begin(), in);
837 #if 0
838  return fprop(std::vector<vec_t>{ in })[0];
839 #else
840  // a workaround to reduce memory consumption by skipping wrapper function
841  std::vector<tensor_t> a(1);
842  a[0].emplace_back(in);
843  return fprop(a)[0][0];
844 #endif
845  }
846 
847  // convenience wrapper for the function below
848  std::vector<vec_t> fprop(const std::vector<vec_t>& in) {
849  return fprop(std::vector<tensor_t>{ in })[0];
850  }
851 
852  std::vector<tensor_t> fprop(const std::vector<tensor_t>& in) {
853  return net_.forward(in);
854  }
855 
856 // template <typename E>
857 // float_t get_loss(const vec_t& out, const vec_t& t) {
858 // assert(out.size() == t.size());
859 // return E::f(out, t);
860 // }
861 
862  template <typename E>
863  bool calc_delta(const std::vector<tensor_t>& in,
864  const std::vector<tensor_t>& v,
865  vec_t& w, tensor_t& dw, int check_index, double eps) {
866  static const float_t delta = std::sqrt(
867  std::numeric_limits<float_t>::epsilon());
868 
869  assert(in.size() == v.size());
870 
871  const serial_size_t sample_count = static_cast<serial_size_t>(in.size());
872 
873  assert(sample_count > 0);
874 
875  // at the moment, channel count must be 1
876  assert(in[0].size() == 1);
877  assert(v[0].size() == 1);
878 
879  // clear previous results, if any
880  for (vec_t& dw_sample : dw) {
881  std::fill(dw_sample.begin(), dw_sample.end(), float_t(0));
882  }
883 
884  // calculate dw/dE by numeric
885  float_t prev_w = w[check_index];
886 
887  float_t f_p = float_t(0);
888  w[check_index] = prev_w + delta;
889  for (serial_size_t i = 0; i < sample_count; i++) {
890  f_p += get_loss<E>(in[i], v[i]);
891  }
892 
893  float_t f_m = float_t(0);
894  w[check_index] = prev_w - delta;
895  for (serial_size_t i = 0; i < sample_count; i++) {
896  f_m += get_loss<E>(in[i], v[i]);
897  }
898 
899  float_t delta_by_numerical = (f_p - f_m) / (float_t(2) * delta);
900  w[check_index] = prev_w;
901 
902  // calculate dw/dE by bprop
903  bprop<E>(fprop(in), v, std::vector<tensor_t>());
904 
905  float_t delta_by_bprop = 0;
906  for (serial_size_t sample = 0; sample < sample_count; ++sample) {
907  delta_by_bprop += dw[sample][check_index];
908  }
909  net_.clear_grads();
910 
911  return std::abs(delta_by_bprop - delta_by_numerical) <= eps;
912  }
913 
914  // convenience wrapper for the function below
915  template <typename E>
916  void bprop(const std::vector<vec_t>& out,
917  const std::vector<vec_t>& t, const std::vector<vec_t>& t_cost) {
918  bprop<E>(std::vector<tensor_t>{out},
919  std::vector<tensor_t>{t}, std::vector<tensor_t>{t_cost});
920  }
921 
922  template <typename E>
923  void bprop(const std::vector<tensor_t>& out,
924  const std::vector<tensor_t>& t,
925  const std::vector<tensor_t>& t_cost) {
926  std::vector<tensor_t> delta = gradient<E>(out, t, t_cost);
927  net_.backward(delta);
928  }
929 
930  void check_t(size_t i, label_t t, serial_size_t dim_out) {
931  if (t >= dim_out) {
932  std::ostringstream os;
933  os << format_str("t[%u]=%u, dim(net output)=%u\n", i, t, dim_out);
934  os << "in classification task, dim(net output) ";
935  os << "must be greater than max class id.\n";
936  if (dim_out == 1) {
937  os << "\n(for regression, use vector<vec_t> ";
938  os << "instead of vector<label_t> for training signal)\n";
939  }
940 
941  throw nn_error("output dimension mismatch!\n " + os.str());
942  }
943  }
944 
945  void check_t(size_t i, const vec_t& t, serial_size_t dim_out) {
946  if (t.size() != dim_out) {
947  throw nn_error(format_str(
948  "output dimension mismatch!\n dim(target[%u])=%u, "
949  "dim(network output size=%u", i, t.size(), dim_out));
950  }
951  }
952 
953  template <typename T>
954  void check_training_data(const std::vector<vec_t>& in,
955  const std::vector<T>& t) {
956  serial_size_t dim_in = in_data_size();
957  serial_size_t dim_out = out_data_size();
958 
959  if (in.size() != t.size()) {
960  throw nn_error("size of training data must be equal to label data");
961  }
962 
963  size_t num = in.size();
964 
965  for (size_t i = 0; i < num; i++) {
966  if (in[i].size() != dim_in) {
967  throw nn_error(format_str(
968  "input dimension mismatch!\n dim(data[%u])=%d, "
969  "dim(network input)=%u", i, in[i].size(), dim_in));
970  }
971  check_t(i, t[i], dim_out);
972  }
973  }
974 
975  void check_target_cost_matrix(const std::vector<tensor_t>& t,
976  const std::vector<tensor_t>& t_cost) {
977  if (!t_cost.empty()) {
978  if (t.size() != t_cost.size()) {
979  throw nn_error("if target cost is supplied, "
980  "its length must equal that of target data");
981  }
982 
983  for (size_t i = 0, end = t.size(); i < end; i++) {
984  check_target_cost_element(t[i], t_cost[i]);
985  }
986  }
987  }
988 
989  // regression
990  void check_target_cost_element(const vec_t& t, const vec_t& t_cost) {
991  if (t.size() != t_cost.size()) {
992  throw nn_error("if target cost is supplied for a regression task, "
993  "its shape must be identical to the target data");
994  }
995  }
996  void check_target_cost_element(const tensor_t& t, const tensor_t& t_cost) {
997  if (t.size() != t_cost.size()) {
998  throw nn_error("if target cost is supplied for a regression task, "
999  "its shape must be identical to the target data");
1000  }
1001  for (size_t i = 0; i < t.size(); i++)
1002  check_target_cost_element(t[i], t_cost[i]);
1003  }
1004 
1005  const tensor_t* get_target_cost_sample_pointer(
1006  const std::vector<tensor_t>& t_cost, size_t i) {
1007  if (!t_cost.empty()) {
1008  assert(i < t_cost.size());
1009  return &(t_cost[i]);
1010  } else {
1011  return nullptr;
1012  }
1013  }
1014 
1015  void normalize_tensor(const std::vector<tensor_t>& inputs,
1016  std::vector<tensor_t>& normalized) {
1017  normalized = inputs;
1018  }
1019 
1020  void normalize_tensor(const std::vector<vec_t>& inputs,
1021  std::vector<tensor_t>& normalized) {
1022  normalized.reserve(inputs.size());
1023  for (size_t i = 0; i < inputs.size(); i++)
1024  normalized.emplace_back(tensor_t{ inputs[i] });
1025  }
1026 
1027  void normalize_tensor(const std::vector<label_t>& inputs,
1028  std::vector<tensor_t>& normalized) {
1029  std::vector<vec_t> vec;
1030  normalized.reserve(inputs.size());
1031  net_.label2vec(&inputs[0], static_cast<serial_size_t>(inputs.size()), &vec);
1032  normalize_tensor(vec, normalized);
1033  }
1034 
1035  std::string name_;
1036  NetType net_;
1037 };
1038 
1049 inline std::vector<vec_t> image2vec(const float_t* data,
1050  const unsigned int rows,
1051  const unsigned int cols,
1052  const unsigned int sizepatch,
1053  const unsigned int step = 1) {
1054  assert(step > 0);
1055  std::vector<vec_t> res((cols-sizepatch) * (rows-sizepatch) / (step*step),
1056  vec_t(sizepatch*sizepatch));
1057  for_i((cols-sizepatch)*(rows-sizepatch)/(step*step), [&](int count) {
1058  const int j = step*(count / ((cols-sizepatch)/step));
1059  const int i = step*(count % ((cols-sizepatch)/step));
1060 
1061  // vec_t sample(sizepatch*sizepatch);
1062 
1063  if (i+sizepatch < cols && j+sizepatch < rows) {
1064  for (unsigned int k = 0; k < sizepatch*sizepatch; k++) {
1065  // for_i(sizepatch*sizepatch, [&](int k) {
1066  unsigned int y = k / sizepatch + j;
1067  unsigned int x = k % sizepatch + i;
1068  res[count][k] = data[x+y*cols];
1069  }
1070  //});
1071  // res[count] = (sample);
1072  }
1073  });
1074  return res;
1075 }
1076 
1077 template <typename Layer>
1078 network<sequential>& operator << (network<sequential>& n, Layer&& l) {
1079  n.net_.add(std::forward<Layer>(l));
1080  return n;
1081 }
1082 
1083 template <typename NetType, typename Char, typename CharTraits>
1084 std::basic_ostream<Char, CharTraits>& operator << (std::basic_ostream<Char,
1085  CharTraits>& os,
1086  const network<NetType>& n) {
1087  n.save(os);
1088  return os;
1089 }
1090 
1091 template <typename NetType, typename Char, typename CharTraits>
1092 std::basic_istream<Char, CharTraits>& operator >> (std::basic_istream<Char,
1093  CharTraits>& os,
1094  network<NetType>& n) {
1095  n.load(os);
1096  return os;
1097 }
1098 
1099 inline void construct_graph(network<graph>& graph,
1100  const std::vector<layer*>& inputs,
1101  const std::vector<layer*>& outputs) {
1102  graph.net_.construct(inputs, outputs);
1103 }
1104 
1105 inline void construct_graph(network<graph>& graph,
1106  const std::vector<std::shared_ptr<layer>>& inputs,
1107  const std::vector<std::shared_ptr<layer>>& outputs) {
1108  std::vector<layer*> in_ptr, out_ptr;
1109  auto shared2ptr = [](std::shared_ptr<layer> l) { return l.get(); };
1110 
1111  std::transform(inputs.begin(), inputs.end(),
1112  std::back_inserter(in_ptr), shared2ptr);
1113  std::transform(outputs.begin(), outputs.end(),
1114  std::back_inserter(out_ptr), shared2ptr);
1115 
1116  graph.net_.construct(in_ptr, out_ptr);
1117 }
1118 } // namespace tiny_dnn
base class of all kind of NN layers
Definition: layer.h:62
A model of neural networks in tiny-dnn.
Definition: network.h:167
void set_netphase(net_phase phase)
set the netphase to train or test
Definition: network.h:382
vec_t predict(const vec_t &in)
executes forward-propagation and returns output
Definition: network.h:187
const layer * operator[](size_t index) const
return raw pointer of index-th layer
Definition: network.h:523
bool has_same_weights(const network< T > &rhs, float_t eps) const
returns if 2 networks have almost(<eps) the same weights
Definition: network.h:588
bool train(Optimizer &optimizer, const std::vector< vec_t > &inputs, const std::vector< label_t > &class_labels, size_t batch_size, int epoch, OnBatchEnumerate on_batch_enumerate, OnEpochEnumerate on_epoch_enumerate, const bool reset_weights=false, const int n_threads=CNN_TASK_SIZE, const std::vector< vec_t > &t_cost=std::vector< vec_t >())
trains the network for a fixed number of epochs (for classification task)
Definition: network.h:247
void init_weight()
explicitly initialize weights of all layers
Definition: network.h:182
std::string to_json() const
save the network architecture as json string
Definition: network.h:657
size_t depth() const
Definition: network.h:516
vec_t predict(const Range &in)
executes forward-propagation and returns output
Definition: network.h:219
network & weight_init(const WeightInit &f)
set weight initializer to all layers
Definition: network.h:566
std::vector< tensor_t > predict(const std::vector< tensor_t > &in)
executes forward-propagation and returns output
Definition: network.h:197
void save(std::ostream &os) const
Definition: network.h:677
result test(const std::vector< vec_t > &in, const std::vector< label_t > &t)
test and generate confusion-matrix for classification task
Definition: network.h:391
float_t get_loss(const std::vector< T > &in, const std::vector< tensor_t > &t)
calculate loss value (the smaller, the better) for regression task
Definition: network.h:436
void fast_load(const char *filepath)
load network weights from filepath, 30 times faster than stream reading
Definition: network.h:692
float_t get_loss(const std::vector< vec_t > &in, const std::vector< vec_t > &t)
calculate loss value (the smaller, the better) for regression task
Definition: network.h:421
network & bias_init(const BiasInit &f)
set bias initializer to all layers
Definition: network.h:577
bool train(Optimizer &optimizer, const std::vector< vec_t > &in, const std::vector< vec_t > &t, size_t batch_size=1, int epoch=1)
Definition: network.h:370
std::vector< vec_t > test(const std::vector< vec_t > &in)
generate output for each input
Definition: network.h:408
bool fit(Optimizer &optimizer, const std::vector< T > &inputs, const std::vector< U > &desired_outputs, size_t batch_size=1, int epoch=1)
Definition: network.h:340
label_t predict_label(const vec_t &in)
executes forward-propagation and returns maximum output index
Definition: network.h:209
bool gradient_check(const std::vector< tensor_t > &in, const std::vector< std::vector< label_t >> &t, float_t eps, grad_check_mode mode)
checking gradients calculated by bprop detail information: http://ufldl.stanford.edu/wiki/index....
Definition: network.h:456
tensor_t predict(const tensor_t &in)
executes forward-propagation and returns output
Definition: network.h:192
serial_size_t in_data_size() const
return total number of elements of input data
Definition: network.h:558
bool train(Optimizer &optimizer, const std::vector< vec_t > &inputs, const std::vector< label_t > &class_labels, size_t batch_size=1, int epoch=1)
Definition: network.h:357
size_t layer_size() const
return number of layers
Definition: network.h:509
void from_json(const std::string &json_string)
load the network architecture from json string
Definition: network.h:669
float_t predict_max_value(const vec_t &in)
executes forward-propagation and returns maximum output
Definition: network.h:202
bool fit(Optimizer &optimizer, const std::vector< T > &inputs, const std::vector< U > &desired_outputs, size_t batch_size, int epoch, OnBatchEnumerate on_batch_enumerate, OnEpochEnumerate on_epoch_enumerate, const bool reset_weights=false, const int n_threads=CNN_TASK_SIZE, const std::vector< U > &t_cost=std::vector< U >())
trains the network for a fixed number of epochs to generate desired output.
Definition: network.h:312
serial_size_t out_data_size() const
return total number of elements of output data
Definition: network.h:551
const T & at(size_t index) const
return index-th layer as <T> throw nn_error if index-th layer cannot be converted to T
Definition: network.h:539
std::string name() const
name of the network
Definition: network.h:177
error exception class for tiny-dnn
Definition: nn_error.h:37
base class of optimizer usesHessian : true if an optimizer uses hessian (2nd order derivative of loss...
Definition: optimizer.h:37
Definition: network.h:57