tiny_dnn  1.0.0
A header only, dependency-free deep learning framework in C++11
conv2d_op_opencl.h
1 /*
2  COPYRIGHT
3 
4  All contributions by Taiga Nomi
5  Copyright (c) 2013, Taiga Nomi
6  All rights reserved.
7 
8  All other contributions:
9  Copyright (c) 2013-2016, the respective contributors.
10  All rights reserved.
11 
12  Each contributor holds copyright over their respective contributions.
13  The project versioning (Git) records all such contribution source information.
14 
15  LICENSE
16 
17  The BSD 3-Clause License
18 
19 
20  Redistribution and use in source and binary forms, with or without
21  modification, are permitted provided that the following conditions are met:
22 
23  * Redistributions of source code must retain the above copyright notice, this
24  list of conditions and the following disclaimer.
25 
26  * Redistributions in binary form must reproduce the above copyright notice,
27  this list of conditions and the following disclaimer in the documentation
28  and/or other materials provided with the distribution.
29 
30  * Neither the name of tiny-cnn nor the names of its
31  contributors may be used to endorse or promote products derived from
32  this software without specific prior written permission.
33 
34  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
35  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
37  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
38  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
40  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
41  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
42  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
43  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44 */
45 #pragma once
46 
47 #include "tiny_dnn/core/framework/op_kernel.h"
48 
49 namespace tiny_dnn {
50 
52  public:
53  explicit Conv2dOpenCLForwardOp(const core::OpKernelConstruction& context)
54  : core::OpKernel(context) {}
55 
56  void compute(const core::OpKernelContext& context) override {
57 #if defined(USE_OPENCL) || defined(USE_CUDA)
58  auto params = OpKernel::params_->conv();
59 
60  // incoming/outcoming data
61  const tensor_t& in_data = context.input(0);
62  const tensor_t& W = context.input(1);
63  const tensor_t& bias = context.input(2);
64  tensor_t& out_data = context.output(1);
65 
66  // initialize outputs
67  fill_tensor(out_data, float_t(0));
68 
69  // retrieve program from register
70  CLCudaAPI::Program program = ProgramManager::getInstance()
71  .program(Program(context.device(), context.Layer()));
72  nn_warn("Got Program");
73 
74  // Creates the kernel from the compiled program and sets the three arguments.
75  // Note that the indices of the arguments have to be set according to their
76  // order in the kernel.
77  auto kernel = CLCudaAPI::Kernel(program, "CFMulti");
78  nn_warn("Got Kernel");
79 
80  tiny_dnn::Device* device = context.device();
81  CLCudaAPI::Context ctx = context.device()->context();
82  CLCudaAPI::Queue queue = context.device()->queue();
83 
84  // TODO(edgar): check if we really need that
85  for (serial_size_t i = 0; i < in_data.size(); ++i) {
86 
87  // Creates device buffers and copies the host data to these
88  // device buffers.
89 
90  auto dev_in = CLCudaAPI::Buffer<float_t>(ctx, queue,
91  in_data[i].begin(), in_data[i].end());
92 
93  auto dev_W = CLCudaAPI::Buffer<float_t>(ctx, queue,
94  W[0].begin(), W[0].end());
95 
96  auto dev_bias = CLCudaAPI::Buffer<float_t>(ctx, queue,
97  bias[0].begin(), bias[0].end());
98 
99  auto dev_out = CLCudaAPI::Buffer<float_t>(ctx, queue,
100  out_data[i].begin(), out_data[i].end());
101 
102  kernel.SetArgument(0, dev_in); // image_data
103  kernel.SetArgument(1, 0); // image_offset
104  kernel.SetArgument(2, dev_W); // kernel_data
105  kernel.SetArgument(3, 0); // kernel_offset
106  kernel.SetArgument(4, dev_bias); // bias
107  kernel.SetArgument(5, 0); // bias_offset
108  kernel.SetArgument(6, dev_out); // convolved_image
109  kernel.SetArgument(7, 0); // convolved_image_offset
110 
111  kernel.SetArgument(8, static_cast<cl_ushort>(params.in.width_)); // WIDTH
112  kernel.SetArgument(9, static_cast<cl_ushort>(params.in.height_)); // HEIGHT
113  kernel.SetArgument(10, static_cast<cl_ushort>(params.out.width_)); // OUTPUT_W
114  kernel.SetArgument(11, static_cast<cl_ushort>(params.out.height_)); // OUTPUT_H
115 
116  // We make sure that work group size is multiple of 16
117  serial_size_t res = device->device().MaxWorkGroupSize() % 16;
118  serial_size_t size = device->device().MaxWorkGroupSize() - res;
119 
120  auto global = std::vector<size_t>{size};
121  auto local = std::vector<size_t>{16};
122 
123  // Creates a new CLCudaAPI event to be able to time kernels
124  auto event = CLCudaAPI::Event();
125 
126  // Enqueues the kernel and waits for the result.
127  // Note that launching the kernel is always a-synchronous and thus
128  // requires finishing the queue in order to complete the operation.
129  nn_info("## Running the kernel ...");
130 
131  kernel.Launch(queue, global, local, event.pointer());
132  queue.Finish(event);
133 
134  nn_info(" > Took " + to_string(event.GetElapsedTime()) + " ms");
135 
136  // Upload data GPU -> CPU
137  std::vector<float_t> out(out_data[i].size(), 0);
138  dev_out.Read(queue, out_data[i].size(), out);
139 
140  // FOR DEBUG ONLY
141  nn_warn("output kernel");
142  for (serial_size_t j = 0; j < out.size(); ++j) {
143  std::cout << out[j] << " ";
144  }
145  std::cout << std::endl;
146 
147  // copy back
148  std::copy(std::begin(out), std::end(out), std::back_inserter(out_data[i]));
149  }
150 #else
151  throw nn_error("Not compiled with OpenCL");
152 #endif
153  }
154 };
155 
157  public:
158  explicit Conv2dOpenCLBackwardOp(const core::OpKernelConstruction& context)
159  : core::OpKernel(context) {}
160 
161  void compute(const core::OpKernelContext& context) override {
162  nn_error("Not implemented yet.");
163  }
164 };
165 
166 } // namespace tiny_dnn
Definition: conv2d_op_opencl.h:156
Definition: conv2d_op_opencl.h:51
Definition: device.fwd.h:73
Definition: program.h:63
Definition: op_kernel.h:55
Definition: op_kernel.h:72
Definition: op_kernel.h:175
error exception class for tiny-dnn
Definition: nn_error.h:37
info class for tiny-dnn (for debug)
Definition: nn_error.h:69
warning class for tiny-dnn (for debug)
Definition: nn_error.h:52