tiny_dnn  1.0.0
A header only, dependency-free deep learning framework in C++11
tensor.h
1 /*
2  COPYRIGHT
3 
4  All contributions by Taiga Nomi
5  Copyright (c) 2013, Taiga Nomi
6  All rights reserved.
7 
8  All other contributions:
9  Copyright (c) 2013-2016, the respective contributors.
10  All rights reserved.
11 
12  Each contributor holds copyright over their respective contributions.
13  The project versioning (Git) records all such contribution source information.
14 
15  LICENSE
16 
17  The BSD 3-Clause License
18 
19 
20  Redistribution and use in source and binary forms, with or without
21  modification, are permitted provided that the following conditions are met:
22 
23  * Redistributions of source code must retain the above copyright notice, this
24  list of conditions and the following disclaimer.
25 
26  * Redistributions in binary form must reproduce the above copyright notice,
27  this list of conditions and the following disclaimer in the documentation
28  and/or other materials provided with the distribution.
29 
30  * Neither the name of tiny-dnn nor the names of its
31  contributors may be used to endorse or promote products derived from
32  this software without specific prior written permission.
33 
34  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
35  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
37  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
38  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
40  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
41  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
42  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
43  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44 */
45 #pragma once
46 
47 #include <cmath> // sqrt
48 #include <algorithm> // std::fill, std::generate
49 #include <numeric> // std::accumulate
50 #include <vector>
51 
52 #include "tiny_dnn/core/framework/device.fwd.h"
53 
54 #if defined(USE_OPENCL) || defined(USE_CUDA)
55 #ifdef USE_OPENCL
56 #include "third_party/CLCudaAPI/clpp11.h"
57 #else
58 #include "third_party/CLCudaAPI/cupp11.h"
59 #endif
60 #endif
61 
62 namespace tiny_dnn {
63 
64 template<typename U = float_t>
65 class Tensor {
66 public:
67  /*
68  * Initializes an empty tensor.
69  */
70  Tensor()
71  {
72  reshape(0, 0, 0, 0);
73  }
74 
75  /*
76  * Create a tensor of the given dimension.
77  * It is assumed that a tensor will hold data in NxWxHxD order,
78  * where:
79  * N the batch axis
80  * W the width axis
81  * H the heigth axis
82  * D the depth axis
83  *
84  * Data will be hold by a std::vector with 64bytes alignment.
85  */
86  explicit Tensor(const size_t d0,
87  const size_t d1,
88  const size_t d2,
89  const size_t d3) {
90  reshape(d0, d1, d2, d3);
91  }
92 
93  explicit Tensor(const std::array<size_t, 4>& shape) {
94  reshape(shape[0], shape[1], shape[2], shape[3]);
95  }
96 
97  explicit Tensor(const std::vector<size_t>& shape) {
98  assert(shape.size() == 4);
99  reshape(shape[0], shape[1], shape[2], shape[3]);
100  }
101 
102  ~Tensor() = default;
103 
104  Tensor(const Tensor&other) {
105  other.fromDevice();
106  shape_ = other.shape_;
107  host_data_ = other.host_data_;
108  data_is_on_host_ = true;
109  data_dirty_ = true;
110  //device_data_ is intentionally left uninitialized.
111  }
112 
113  Tensor &operator = (const Tensor& other) {
114  other.fromDevice();
115  shape_ = other.shape_;
116  data_is_on_host_ = true;
117  data_dirty_ = true;
118  host_data_ = other.host_data_;
119 
120  //device_data_ is intentionally left as-is. It will be erased only if new tensor won't fit, and only when data gets moved to the GPU.
121  return *this;
122  }
123 
124 #ifdef CNN_USE_DEFAULT_MOVE_CONSTRUCTORS
125  Tensor(Tensor&& other) = default; // move ctor
126  Tensor &operator = (Tensor&&) = default; // move assign
127 #else
128  Tensor(Tensor&& other) { // for VS2013 we need to manually implement these if we want to have move semantics
129  shape_ = std::move(other.shape_);
130  host_data_ = std::move(other.host_data_);
131 #if defined(USE_OPENCL) || defined(USE_CUDA)
132  device_data_ = std::move(other.device_data_);
133 #endif
134  data_is_on_host_ = other.data_is_on_host_;
135  data_dirty_ = other.data_dirty_;
136  }
137 
138  Tensor &operator = (Tensor&& other) {
139  shape_ = std::move(other.shape_);
140  host_data_ = std::move(other.host_data_);
141 #if defined(USE_OPENCL) || defined(USE_CUDA)
142  device_data_ = std::move(other.device_data_);
143 #endif
144  data_is_on_host_ = other.data_is_on_host_;
145  data_dirty_ = other.data_dirty_;
146  return *this;
147  }
148 #endif
149 
150  // Returns the tensor shape
151  const std::array<size_t, 4>& shape() const { return shape_; }
152 
153  // Returns the value of a specified index in the tensor.
154  // Checked version (throw exceptions for out-of-range error)
155  U& host_at(const size_t d0,
156  const size_t d1,
157  const size_t d2,
158  const size_t d3) {
159  return *host_ptr(d0, d1, d2, d3);
160  }
161 
162  U host_at(const size_t d0,
163  const size_t d1,
164  const size_t d2,
165  const size_t d3) const {
166  return *host_ptr(d0, d1, d2, d3);
167  }
168 
169  // Returns the pointer to a specified index in the tensor
170  // Checked version (throw exceptions for out-of-range error)
171  const U* host_ptr(const size_t d0,
172  const size_t d1,
173  const size_t d2,
174  const size_t d3) const {
175  if (d0 >= shape_[0] || d1 >= shape_[1] ||
176  d2 >= shape_[2] || d3 >= shape_[3]) {
177  throw nn_error("Access tensor out of range.");
178  }
179 
180  return host_data() + (
181  shape_[1] * shape_[2] * shape_[3] * d0 +
182  shape_[1] * shape_[2] * d3 +
183  shape_[1] * d2 +
184  d1
185  );
186  }
187 
188  U* host_ptr(const size_t d0,
189  const size_t d1,
190  const size_t d2,
191  const size_t d3) {
192  if (d0 >= shape_[0] || d1 >= shape_[1] ||
193  d2 >= shape_[2] || d3 >= shape_[3]) {
194  throw nn_error("Access tensor out of range.");
195  }
196 
197  return mutable_host_data() + (
198  shape_[1] * shape_[2] * shape_[3] * d0 +
199  shape_[1] * shape_[2] * d3 +
200  shape_[1] * d2 +
201  d1
202  );
203  }
204 
205  const U* host_data() const {
206  fromDevice();
207  return host_data_.data();
208  }
209 
210  U* mutable_host_data() {
211  fromDevice();
212  data_dirty_ = true;
213  return host_data_.data();
214  }
215 
216 #if defined(USE_OPENCL) || defined(USE_CUDA)
217  const void *device_data() const {
218  toDevice();
219  return (*device_data_)();
220  }
221 
222  void *mutable_device_data() {
223  toDevice();
224  data_dirty_ = true;
225  return (*device_data_)();
226  }
227 #endif
228 
229  size_t size() const {
230  return host_data_.size();
231  }
232 
233  void fill(U value) {
234  data_is_on_host_ = true;
235  data_dirty_ = true;
236  std::fill(std::begin(host_data_), std::end(host_data_), value);
237  }
238 
239  void reshape(const size_t d0,
240  const size_t d1,
241  const size_t d2,
242  const size_t d3) {
243  shape_[0] = d0;
244  shape_[1] = d1;
245  shape_[2] = d2;
246  shape_[3] = d3;
247  host_data_.resize(calcSize(), U(0));
248  }
249 
250  void reshape(const std::array<size_t, 4> &sz) {
251  shape_ = sz;
252  host_data_.resize(calcSize(), U(0));
253  }
254 
255 private:
256  size_t calcSize() const {
257  return std::accumulate(std::begin(shape_), std::end(shape_), size_t(1), std::multiplies<size_t>());
258  }
259 
260  void toDevice() const {
261  if (data_is_on_host_ && data_dirty_) {
262 #if defined(USE_OPENCL) || defined(USE_CUDA)
263  CLCudaAPI::Queue queue = device->queue();
264  if (device_data_ && device_data_->GetSize() >= host_data_.size()) {
265  device_data_->Write(queue, host_data.size(), host_data_.data(), 0);
266  }
267  else {
268  CLCudaAPI::Context ctx = device->context();
269  device_data_ = make_unique<CLCudaAPI::Buffer<U> >(
270  ctx, queue, host_data_.begin(), host_data_.end());
271  }
272 #endif
273  data_is_on_host_ = false;
274  data_dirty_ = false;
275  }
276  }
277 
278  void fromDevice() const {
279  if (!data_is_on_host_ && data_dirty_) {
280 #if defined(USE_OPENCL) || defined(USE_CUDA)
281  assert(device_);
282  assert(device_data_);
283  device_data_->Read(device_->queue(), host_data_.size(), const_cast<U*>(host_data_.data())); // using const_cast<> to avoid making host_data_ entirely mutable
284 #endif
285  data_is_on_host_ = true;
286  data_dirty_ = false;
287  }
288  }
289 
290 private:
291  /* Vector with the size of the tensor
292  * shape_[0]: batch
293  * shape_[1]: width
294  * shape_[2]: height
295  * shape_[3]: depth
296  */
297  std::array<size_t, 4> shape_;
298 
299  /* Pointer to the Tensor data in pure in the host device */
300  std::vector<U, aligned_allocator<U, 64> > host_data_;
301 
302 #if defined(USE_OPENCL) || defined(USE_CUDA)
303  /* Pointer to the Tensor data in the device */
304  std::unique_ptr<CLCudaAPI::Buffer<U> > device_data_;
305 #endif
306  mutable bool data_is_on_host_; //< current data is on host if true, on device if false.
307  mutable bool data_dirty_; //< set to true if current data might have been modified
308 
309  /* Pointer to the current device where the data resides */
310  Device* device_;
311 };
312 
313 // Overloaded method to print the Tensor class to the standard output
314 template<typename T>
315 inline std::ostream& operator<< (std::ostream &os,
316  const Tensor<T>& tensor) {
317  const std::vector<serial_size_t>& shape = tensor.shape();
318  for (serial_size_t i = 0; i < shape[0]; ++i) {
319  os << "-- Batch: " << i << "\n";
320  for (serial_size_t j = 0; j < shape[3]; ++j) {
321  os << "-- Channel: " << j << "\n";
322  os << "-- Data:\n";
323  for (serial_size_t k = 0; k < shape[1]; ++k) {
324  for (serial_size_t l = 0; l < shape[2]; ++l) {
325  os << " " << tensor.at(i, k, l, j) << " ";
326  }
327  os << ";\n";
328  }
329  }
330  }
331  os << "----------------\n"
332  << "--> Tensor size: [ "
333  << shape[0] << " x " << shape[1] << " x "
334  << shape[2] << " x " << shape[3] << " ]\n";
335  return os;
336 }
337 
338 // utilities for element-wise and tensor-scalar/scalar-tensor operations
339 
340 template<typename TD, typename TS1, typename TS2, typename F> void binary_tensor_tensor_elementwise_operation(Tensor<TD> &dst, const Tensor<TS1> &src1, const Tensor<TS2> &src2, F f) {
341  if (src1.shape() != src2.shape()) {
342  throw nn_error("Tensor must have same shape");
343  }
344 
345  dst.reshape(src1.shape());
346 
347  TD* pdst = dst.mutable_host_data();
348  const TS1* psrc1 = src1.host_data();
349  const TS2* psrc2 = src2.host_data();
350 
351  for_i(true, dst.size(), [pdst, psrc1, psrc2, &f](size_t i) {
352  pdst[i] = f(psrc1[i], psrc2[i]);
353  });
354 }
355 
356 template<typename TD, typename TS, typename F> void unary_tensor_elementwise_operation(Tensor<TD> &dst, const Tensor<TS> &src, F f) {
357  dst.reshape(src.shape());
358 
359  TD* pdst = dst.mutable_host_data();
360  const TS* psrc = src.host_data();
361 
362  for_i(true, dst.size(), [pdst, psrc, &f](size_t i) {
363  pdst[i] = f(psrc[i]);
364  });
365 }
366 
367 template<typename TD, typename TS1, typename TS2, typename F> void binary_tensor_scalar_operation(Tensor<TD> &dst, const Tensor<TS1> &src1, TS2 src2, F f) {
368  dst.reshape(src1.shape());
369 
370  TD* pdst = dst.mutable_host_data();
371  const TS1* psrc1 = src1.host_data();
372 
373  for_i(true, dst.size(), [pdst, psrc1, src2, &f](size_t i) {
374  pdst[i] = f(psrc1[i], src2);
375  });
376 }
377 
378 template<typename TD, typename TS1, typename TS2, typename F> void binary_scalar_tensor_operation(Tensor<TD> &dst, TS1 src1, const Tensor<TS2> &src2, F f) {
379  dst.reshape(src2.shape());
380 
381  TD* pdst = dst.mutable_host_data();
382  const TS2* psrc2 = src2.host_data();
383 
384  for_i(true, dst.size(), [pdst, src1, psrc2, &f](size_t i) {
385  pdst[i] = f(src1, psrc2[i]);
386  });
387 }
388 
389 // implementation of
390 
391 namespace details {
392  template<typename TS1, typename TS2> auto plus(TS1 s1, TS2 s2) -> decltype(s1 + s2) { return s1 + s2; }
393 
394  template<typename TS1, typename TS2> auto minus(TS1 s1, TS2 s2) -> decltype(s1 - s2) { return s1 - s2; }
395 
396  template<typename TS1, typename TS2> auto multiplies(TS1 s1, TS2 s2) -> decltype(s1 * s2) { return s1 * s2; }
397 
398  template<typename TS1, typename TS2> auto divides_checked(TS1 s1, TS2 s2) -> decltype(s1 / s2) {
399  typedef decltype(s1 / s2) result_type;
400  return (s2 == result_type{}) ? std::numeric_limits<result_type>::quiet_NaN() : s1 / s2;
401  }
402 
403  template<typename TS1, typename TS2> auto divides_unchecked(TS1 s1, TS2 s2) -> decltype(s1 / s2) {
404  return s1 / s2;
405  }
406 
407  template<typename T> T sqrt_checked(T s1) {
408  return (s1 <= T{}) ? std::numeric_limits<T>::quiet_NaN() : sqrt(s1);
409  }
410 
411  // do not inline - this function converts the std::exp overloadeds in a single templated function.
412  template<typename T> T exp(T s1) {
413  return std::exp(s1);
414  }
415 }
416 
417 template<typename TD, typename TS1, typename TS2> void layer_add(Tensor<TD> &dst, TS1 src1, const Tensor<TS2> &src2) {
418  binary_scalar_tensor_operation(dst, src1, src2, details::plus<TS1, TS2>);
419 }
420 
421 template<typename TD, typename TS1, typename TS2> void layer_add(Tensor<TD> &dst, const Tensor<TS1> &src1, TS2 src2) {
422  binary_tensor_scalar_operation(dst, src1, src2, details::plus<TS1, TS2>);
423 }
424 
425 template<typename TD, typename TS1, typename TS2> void layer_add(Tensor<TD> &dst, const Tensor<TS1> &src1, const Tensor<TS2> &src2) {
426  binary_tensor_tensor_elementwise_operation(dst, src1, src2, details::plus<TS1, TS2>);
427 }
428 
429 template<typename TD, typename TS1, typename TS2> void layer_sub(Tensor<TD> &dst, TS1 src1, const Tensor<TS2> &src2) {
430  binary_scalar_tensor_operation(dst, src1, src2, details::minus<TS1, TS2>);
431 }
432 
433 template<typename TD, typename TS1, typename TS2> void layer_sub(Tensor<TD> &dst, const Tensor<TS1> &src1, TS2 src2) {
434  binary_tensor_scalar_operation(dst, src1, src2, details::minus<TS1, TS2>);
435 }
436 
437 template<typename TD, typename TS1, typename TS2> void layer_sub(Tensor<TD> &dst, const Tensor<TS1> &src1, const Tensor<TS2> &src2) {
438  binary_tensor_tensor_elementwise_operation(dst, src1, src2, details::minus<TS1, TS2>);
439 }
440 
441 template<typename TD, typename TS1, typename TS2> void layer_mul(Tensor<TD> &dst, TS1 src1, const Tensor<TS2> &src2) {
442  binary_scalar_tensor_operation(dst, src1, src2, details::multiplies<TS1, TS2>);
443 }
444 
445 template<typename TD, typename TS1, typename TS2> void layer_mul(Tensor<TD> &dst, const Tensor<TS1> &src1, TS2 src2) {
446  binary_tensor_scalar_operation(dst, src1, src2, details::multiplies<TS1, TS2>);
447 }
448 
449 template<typename TD, typename TS1, typename TS2> void layer_mul(Tensor<TD> &dst, const Tensor<TS1> &src1, const Tensor<TS2> &src2) {
450  binary_tensor_tensor_elementwise_operation(dst, src1, src2, details::multiplies<TS1, TS2>);
451 }
452 
453 template<typename TD, typename TS1, typename TS2> void layer_div(Tensor<TD> &dst, TS1 src1, const Tensor<TS2> &src2) {
454  binary_scalar_tensor_operation(dst, src1, src2, details::divides_checked<TS1, TS2>);
455 }
456 
457 template<typename TD, typename TS1, typename TS2> void layer_div(Tensor<TD> &dst, const Tensor<TS1> &src1, TS2 src2) {
458  if (src2 == TS2(0.0)) {
459  dst.reshape(src1.shape());
460  dst.fill(std::numeric_limits<TD>::quiet_NaN());
461  } else {
462  binary_tensor_scalar_operation(dst, src1, src2, details::divides_unchecked<TS1, TS2>);
463  }
464 }
465 
466 template<typename TD, typename TS1, typename TS2> void layer_div(Tensor<TD> &dst, const Tensor<TS1> &src1, const Tensor<TS2> &src2) {
467  binary_tensor_tensor_elementwise_operation(dst, src1, src2, details::divides_checked<TS1, TS2>);
468 }
469 
470 template<typename TD, typename TS> void layer_sqrt(Tensor<TD> &dst, const Tensor<TS> &src1) {
471  return unary_tensor_elementwise_operation(dst, src1, details::sqrt_checked<TS>);
472 }
473 
474 template<typename TD, typename TS> void layer_exp(Tensor<TD> &dst, const Tensor<TS> &src1) {
475  return unary_tensor_elementwise_operation(dst, src1, details::exp<TS>);
476 }
477 
478 } // namespace tiny_dnn
Definition: device.fwd.h:73
Definition: tensor.h:65
error exception class for tiny-dnn
Definition: nn_error.h:37