47 #include "tiny_dnn/core/framework/op_kernel.h"
65 if (OpKernel::device_ !=
nullptr) {
66 auto params = OpKernel::params_->conv();
67 init_libdnn(OpKernel::device_, params);
74 const tensor_t& in_data = context.input(0);
75 const tensor_t& W = context.input(1);
76 const tensor_t& bias = context.input(2);
77 tensor_t& out_data = context.output(1);
83 fill_tensor(out_data, float_t(0));
87 CLCudaAPI::Context ctx = OpKernel::device_->context();
88 CLCudaAPI::Queue queue = OpKernel::device_->queue();
90 for (serial_size_t i = 0; i < in_data.size(); ++i) {
94 auto dev_in = CLCudaAPI::Buffer<float_t>(ctx, queue,
95 in_data[i].begin(), in_data[i].end());
97 auto dev_W = CLCudaAPI::Buffer<float_t>(ctx, queue,
98 W[0].begin(), W[0].end());
100 auto dev_bias = CLCudaAPI::Buffer<float_t>(ctx, queue,
101 bias[0].begin(), bias[0].end());
103 auto dev_out = CLCudaAPI::Buffer<float_t>(ctx, queue,
104 out_data[i].begin(), out_data[i].end());
110 const int batch_size = 1;
112 const float_t* input_ptr = double_cast(dev_in());
113 const float_t* weights_ptr = double_cast(dev_W());
114 const float_t* bias_ptr = double_cast(dev_bias());
116 float_t* output_ptr = mutable_double_cast(dev_out());
134 kernel_->Forward(input_ptr,
166 std::vector<float_t> out(out_data[i].size(), 0);
167 dev_out.Read(queue, out_data[i].size(), out);
179 std::copy(std::begin(out), std::end(out), std::begin(out_data[i]));
183 throw nn_error(
"TinyDNN was not compiled with LibDNN support.");
188 #ifdef CNN_USE_LIBDNN
189 float_t* mutable_double_cast(
const cl_mem cl_mem_gpu) {
190 return static_cast<float_t*
>(
191 reinterpret_cast<void*
>(cl_mem_gpu));
194 const float_t* double_cast(
const cl_mem cl_mem_gpu) {
195 return reinterpret_cast<const float_t*
>(
196 reinterpret_cast<const void*
>(cl_mem_gpu));
201 #ifdef CNN_USE_LIBDNN
202 assert(device !=
nullptr);
205 greentea::device::setupViennaCLContext(device->deviceId(),
206 device->context()(), device->device()(), device->queue()());
209 std::make_shared<greentea::device>(
214 #
if defined(USE_OPENCL)
215 greentea::Backend::BACKEND_OpenCL
216 #elif defined(USE_CUDA)
217 greentea::Backend::BACKEND_CUDA
219 greentea::Backend::BACKEND_CPU
227 greentea::LibDNNConfig config;
229 config.dev_ptr = dev_ptr_.get();
233 const float_t dy = params.in_padded.height_ - params.in.height_;
234 const float_t dx = params.in_padded.width_ - params.in.width_;
236 std::vector<int32_t> in_shape = {
243 std::vector<int32_t> out_shape = {
250 std::vector<int32_t> kernel = {
251 params.weight.height_,
255 std::vector<int32_t> pad = { dy/2, dx/2 };
257 std::vector<int32_t> stride = {
262 std::vector<int32_t> dilation = { 1, 1 };
264 config.in_shape = in_shape;
265 config.out_shape = out_shape;
267 config.kernel = kernel;
268 config.stride = stride;
269 config.dilation = dilation;
272 config.bias_term = params.has_bias;
275 config.fast_unsafe_math =
false;
277 config.weights_backward =
false;
279 config.bias_backward =
false;
283 if (std::is_same<float_t, float>::value ||
284 dev_ptr_->CheckCapability(
"cl_khr_int64_base_atomics")) {
285 config.wgalgo = greentea::LIBDNN_CONVOLUTION_WG_ALGO_ATOMIC;
286 config.bwalgo = greentea::LIBDNN_CONVOLUTION_BW_ALGO_COL2IM_ATOMIC;
288 config.wgalgo = greentea::LIBDNN_CONVOLUTION_WG_ALGO_DIRECT;
289 config.bwalgo = greentea::LIBDNN_CONVOLUTION_BW_ALGO_IM2COL;
293 kernel_.reset(
new greentea::LibDNNConv<float_t>(config));
298 #ifdef CNN_USE_LIBDNN
299 std::shared_ptr<greentea::device> dev_ptr_;
300 std::shared_ptr<greentea::LibDNNConv<float_t> > kernel_;
311 throw nn_error(
"Not implemented yet.");
Definition: conv2d_op_libdnn.h:305
Definition: conv2d_op_libdnn.h:55
Definition: device.fwd.h:73
Definition: op_kernel.h:55
Definition: op_kernel.h:72
Definition: op_kernel.h:175
Definition: conv_params.h:92
error exception class for tiny-dnn
Definition: nn_error.h:37