tiny_dnn  1.0.0
A header only, dependency-free deep learning framework in C++11
conv2d_op_libdnn.h
1 /*
2  COPYRIGHT
3 
4  All contributions by Taiga Nomi
5  Copyright (c) 2013, Taiga Nomi
6  All rights reserved.
7 
8  All other contributions:
9  Copyright (c) 2013-2016, the respective contributors.
10  All rights reserved.
11 
12  Each contributor holds copyright over their respective contributions.
13  The project versioning (Git) records all such contribution source information.
14 
15  LICENSE
16 
17  The BSD 3-Clause License
18 
19 
20  Redistribution and use in source and binary forms, with or without
21  modification, are permitted provided that the following conditions are met:
22 
23  * Redistributions of source code must retain the above copyright notice, this
24  list of conditions and the following disclaimer.
25 
26  * Redistributions in binary form must reproduce the above copyright notice,
27  this list of conditions and the following disclaimer in the documentation
28  and/or other materials provided with the distribution.
29 
30  * Neither the name of tiny-cnn nor the names of its
31  contributors may be used to endorse or promote products derived from
32  this software without specific prior written permission.
33 
34  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
35  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
37  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
38  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
40  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
41  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
42  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
43  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44 */
45 #pragma once
46 
47 #include "tiny_dnn/core/framework/op_kernel.h"
48 
49 #ifdef CNN_USE_LIBDNN
50 #include "libdnn.hpp"
51 #endif
52 
53 namespace tiny_dnn {
54 
56  public:
57  explicit Conv2dLibDNNForwardOp(const core::OpKernelConstruction& context)
58  : core::OpKernel(context)
59 #ifdef CNN_USE_LIBDNN
60  , initialized_(false)
61 #endif
62  {
63  // TODO(edgar): remove this if statement when refactor
64  // the init_backend() routine at layer level.
65  if (OpKernel::device_ != nullptr) {
66  auto params = OpKernel::params_->conv();
67  init_libdnn(OpKernel::device_, params);
68  }
69  }
70 
71  void compute(const core::OpKernelContext& context) override {
72 #ifdef CNN_USE_LIBDNN
73  // incoming/outcoming datm
74  const tensor_t& in_data = context.input(0);
75  const tensor_t& W = context.input(1);
76  const tensor_t& bias = context.input(2);
77  tensor_t& out_data = context.output(1);
78 
79  // retrieve the convolutional parameters and pad input
80  // Conv2d::setParams(context.params());
81 
82  // initialize outputs
83  fill_tensor(out_data, float_t(0));
84 
85  // retrive device context and queue
86 
87  CLCudaAPI::Context ctx = OpKernel::device_->context();
88  CLCudaAPI::Queue queue = OpKernel::device_->queue();
89 
90  for (serial_size_t i = 0; i < in_data.size(); ++i) {
91 
92  // allocate data to GPU
93 
94  auto dev_in = CLCudaAPI::Buffer<float_t>(ctx, queue,
95  in_data[i].begin(), in_data[i].end());
96 
97  auto dev_W = CLCudaAPI::Buffer<float_t>(ctx, queue,
98  W[0].begin(), W[0].end());
99 
100  auto dev_bias = CLCudaAPI::Buffer<float_t>(ctx, queue,
101  bias[0].begin(), bias[0].end());
102 
103  auto dev_out = CLCudaAPI::Buffer<float_t>(ctx, queue,
104  out_data[i].begin(), out_data[i].end());
105 
106  // cast data types and call libdnn
107 
108  // TODO(edgar): set a global variable with batch size or
109  // embedd this inside the next gen Tensor class.
110  const int batch_size = 1;
111 
112  const float_t* input_ptr = double_cast(dev_in());
113  const float_t* weights_ptr = double_cast(dev_W());
114  const float_t* bias_ptr = double_cast(dev_bias());
115 
116  float_t* output_ptr = mutable_double_cast(dev_out());
117 
118  // first time, tune the kernel
119 
120  // TODO(edgar/naibaf): enable when second generation
121  // kernel are available
122 
123  if (!initialized_) {
124  /*kernel_->Tune(const_cast<float_t*>(output_ptr), nullptr,
125  const_cast<float_t*>(weights_ptr), nullptr,
126  const_cast<float_t*>(bias_ptr), nullptr,
127  const_cast<float_t*>(input_ptr), nullptr,
128  batch_size);
129  initialized_ = true;*/
130  }
131 
132  // call libdnn forward
133 
134  kernel_->Forward(input_ptr,
135  weights_ptr,
136  bias_ptr,
137  output_ptr,
138  batch_size);
139 
140 
141  // Upload data GPU -> CPU
142  /*std::vector<float_t> dev_W_shadow(W.size(), 0);
143  dev_W.Read(queue, W.size(), dev_W_shadow);
144 
145  // FOR DEBUG ONLY
146  nn_warn("W kernel");
147  for (serial_size_t j = 0; j < W.size(); ++j) {
148  std::cout << dev_W_shadow[j] << " ";
149  }
150  std::cout << std::endl;
151 
152  // Upload data GPU -> CPU
153  std::vector<float_t> dev_in_shadow(in_data_padded[i].size(), 0);
154  dev_in.Read(queue, in_data_padded[i].size(), dev_in_shadow);
155 
156  // FOR DEBUG ONLY
157  nn_warn("input kernel");
158  for (serial_size_t j = 0; j < in_data_padded[i].size(); ++j) {
159  std::cout << dev_in_shadow[j] << " ";
160  }
161  std::cout << std::endl;*/
162 
163 
164  // Upload data GPU -> CPU
165  // TODO(edgar): trigger this only when is needed
166  std::vector<float_t> out(out_data[i].size(), 0);
167  dev_out.Read(queue, out_data[i].size(), out);
168 
169  /*
170  // FOR DEBUG ONLY
171  nn_warn("output kernel");
172  for (serial_size_t j = 0; j < out.size(); ++j) {
173  std::cout << out[j] << " ";
174  }
175  std::cout << std::endl;
176  */
177 
178  // copy data to be activated
179  std::copy(std::begin(out), std::end(out), std::begin(out_data[i]));
180  }
181 
182 #else
183  throw nn_error("TinyDNN was not compiled with LibDNN support.");
184 #endif
185  }
186 
187  private:
188 #ifdef CNN_USE_LIBDNN
189  float_t* mutable_double_cast(const cl_mem cl_mem_gpu) {
190  return static_cast<float_t*>(
191  reinterpret_cast<void*>(cl_mem_gpu));
192  }
193 
194  const float_t* double_cast(const cl_mem cl_mem_gpu) {
195  return reinterpret_cast<const float_t*>(
196  reinterpret_cast<const void*>(cl_mem_gpu));
197  }
198 #endif
199 
200  void init_libdnn(const Device* device, const core::conv_params& params) {
201 #ifdef CNN_USE_LIBDNN
202  assert(device != nullptr);
203 
204  // Context needs to be initialized with one device and queue
205  greentea::device::setupViennaCLContext(device->deviceId(),
206  device->context()(), device->device()(), device->queue()());
207 
208  dev_ptr_ =
209  std::make_shared<greentea::device>(
210  device->deviceId(),
211  device->deviceId(), /* list_id, */
212  // TODO(edgar): refactor this since it's possible
213  // to have OpenCL and CUDA.
214 #if defined(USE_OPENCL)
215  greentea::Backend::BACKEND_OpenCL
216 #elif defined(USE_CUDA)
217  greentea::Backend::BACKEND_CUDA
218 #else
219  greentea::Backend::BACKEND_CPU
220 #endif
221  );
222 
223  // Initialize device pointer in libdnn
224  dev_ptr_->Init();
225 
226  // Setup libdnn params
227  greentea::LibDNNConfig config;
228 
229  config.dev_ptr = dev_ptr_.get();
230 
231  // NCHW shape setups
232 
233  const float_t dy = params.in_padded.height_ - params.in.height_;
234  const float_t dx = params.in_padded.width_ - params.in.width_;
235 
236  std::vector<int32_t> in_shape = {
237  1,
238  params.in.depth_,
239  params.in.height_,
240  params.in.width_
241  };
242 
243  std::vector<int32_t> out_shape = {
244  1,
245  params.out.depth_,
246  params.out.height_,
247  params.out.width_
248  };
249 
250  std::vector<int32_t> kernel = {
251  params.weight.height_,
252  params.weight.width_
253  };
254 
255  std::vector<int32_t> pad = { dy/2, dx/2 };
256 
257  std::vector<int32_t> stride = {
258  params.h_stride,
259  params.w_stride
260  };
261 
262  std::vector<int32_t> dilation = { 1, 1 };
263 
264  config.in_shape = in_shape;
265  config.out_shape = out_shape;
266  config.pad = pad;
267  config.kernel = kernel;
268  config.stride = stride;
269  config.dilation = dilation;
270  config.group = 1;
271 
272  config.bias_term = params.has_bias;
273 
274  // Disables some optimizations but may give more stable results
275  config.fast_unsafe_math = false;
276  // Disables backward pass of weights during kernel.Backward();
277  config.weights_backward = false;
278  // Disables backward pass for bias during kernel.Backward();
279  config.bias_backward = false;
280 
281  // (Disabling bias and weight backward pass only propagates the data gradient (error))
282 
283  if (std::is_same<float_t, float>::value ||
284  dev_ptr_->CheckCapability("cl_khr_int64_base_atomics")) {
285  config.wgalgo = greentea::LIBDNN_CONVOLUTION_WG_ALGO_ATOMIC;
286  config.bwalgo = greentea::LIBDNN_CONVOLUTION_BW_ALGO_COL2IM_ATOMIC;
287  } else {
288  config.wgalgo = greentea::LIBDNN_CONVOLUTION_WG_ALGO_DIRECT;
289  config.bwalgo = greentea::LIBDNN_CONVOLUTION_BW_ALGO_IM2COL;
290  }
291 
292  // generate sources and compile kernel
293  kernel_.reset(new greentea::LibDNNConv<float_t>(config));
294 #endif
295  }
296 
297  private:
298 #ifdef CNN_USE_LIBDNN
299  std::shared_ptr<greentea::device> dev_ptr_;
300  std::shared_ptr<greentea::LibDNNConv<float_t> > kernel_;
301  bool initialized_;
302 #endif
303 };
304 
306  public:
307  explicit Conv2dLibDNNBackwardOp(const core::OpKernelConstruction& context)
308  : core::OpKernel(context) {}
309 
310  void compute(const core::OpKernelContext& context) override {
311  throw nn_error("Not implemented yet.");
312  }
313 };
314 
315 } // namespace tiny_dnn
Definition: conv2d_op_libdnn.h:305
Definition: conv2d_op_libdnn.h:55
Definition: device.fwd.h:73
Definition: op_kernel.h:55
Definition: op_kernel.h:72
Definition: op_kernel.h:175
Definition: conv_params.h:92
error exception class for tiny-dnn
Definition: nn_error.h:37