28 #include "tiny_dnn/util/util.h"
35 static float_t f(
const vec_t& y,
const vec_t& t) {
36 assert(y.size() == t.size());
39 for(serial_size_t i = 0; i < y.size(); ++i)
40 d += (y[i] - t[i]) * (y[i] - t[i]);
45 static vec_t df(
const vec_t& y,
const vec_t& t) {
46 assert(y.size() == t.size());
48 float_t factor = float_t(2) /
static_cast<float_t
>(t.size());
50 for(serial_size_t i = 0; i < y.size(); ++i)
51 d[i] = factor * (y[i] - t[i]);
60 static float_t f(
const vec_t& y,
const vec_t& t) {
61 assert(y.size() == t.size());
62 float_t d = float_t(0);
64 for(serial_size_t i = 0; i < y.size(); ++i)
65 d += std::abs(y[i] - t[i]);
70 static vec_t df(
const vec_t& y,
const vec_t& t) {
71 assert(y.size() == t.size());
73 float_t factor = float_t(1) /
static_cast<float_t
>(t.size());
75 for(serial_size_t i = 0; i < y.size(); ++i) {
76 float_t sign = y[i] - t[i];
78 d[i] = -float_t(1) * factor;
80 d[i] = float_t(1) * factor;
91 template<
int fraction>
94 static float_t f(
const vec_t& y,
const vec_t& t) {
95 assert(y.size() == t.size());
96 float_t d = float_t(0);
97 const float_t eps = float_t(1) / fraction;
99 for(serial_size_t i = 0; i < y.size(); ++i) {
100 float_t diff = std::abs(y[i] - t[i]);
107 static vec_t df(
const vec_t& y,
const vec_t& t) {
108 assert(y.size() == t.size());
110 const float_t factor = float_t(1) /
static_cast<float_t
>(t.size());
111 const float_t eps = float_t(1) / fraction;
113 for(serial_size_t i = 0; i < y.size(); ++i) {
114 float_t sign = y[i] - t[i];
116 d[i] = -float_t(1) * factor;
118 d[i] = float_t(1) * factor;
129 static float_t f(
const vec_t& y,
const vec_t& t) {
130 assert(y.size() == t.size());
131 float_t d = float_t(0);
133 for(serial_size_t i = 0; i < y.size(); ++i)
134 d += -t[i] * std::log(y[i]) - (float_t(1) - t[i]) * std::log(float_t(1) - y[i]);
139 static vec_t df(
const vec_t& y,
const vec_t& t) {
140 assert(y.size() == t.size());
143 for(serial_size_t i = 0; i < y.size(); ++i)
144 d[i] = (y[i] - t[i]) / (y[i] * (float_t(1) - y[i]));
153 static float_t f(
const vec_t& y,
const vec_t& t) {
154 assert(y.size() == t.size());
157 for(serial_size_t i = 0; i < y.size(); ++i)
158 d += -t[i] * std::log(y[i]);
163 static vec_t df(
const vec_t& y,
const vec_t& t) {
164 assert(y.size() == t.size());
167 for(serial_size_t i = 0; i < y.size(); ++i)
168 d[i] = - t[i] / y[i];
174 template <
typename E>
175 vec_t gradient(
const vec_t& y,
const vec_t& t) {
176 assert(y.size() == t.size());
180 template <
typename E>
181 std::vector<vec_t> gradient(
const std::vector<vec_t>& y,
const std::vector<vec_t>& t) {
182 std::vector<vec_t> grads;
184 assert(y.size() == t.size());
186 for (serial_size_t i = 0; i < y.size(); i++)
187 grads.push_back(gradient<E>(y[i], t[i]));
192 inline void apply_cost_if_defined(std::vector<vec_t>& sample_gradient,
193 const std::vector<vec_t>& sample_cost) {
194 if (sample_gradient.size() == sample_cost.size()) {
196 const serial_size_t channel_count =
static_cast<serial_size_t
>(sample_gradient.size());
197 for (
size_t channel = 0; channel < channel_count; ++channel) {
198 if (sample_gradient[channel].size() == sample_cost[channel].size()) {
199 const size_t element_count = sample_gradient[channel].size();
202 for (
size_t element = 0; element < element_count; ++element) {
203 sample_gradient[channel][element] *= sample_cost[channel][element];
211 template <
typename E>
212 std::vector<tensor_t> gradient(
const std::vector<tensor_t>& y,
213 const std::vector<tensor_t>& t,
214 const std::vector<tensor_t>& t_cost) {
216 const serial_size_t sample_count =
static_cast<serial_size_t
>(y.size());
217 const serial_size_t channel_count =
static_cast<serial_size_t
>(y[0].size());
219 std::vector<tensor_t> gradients(sample_count);
221 CNN_UNREFERENCED_PARAMETER(channel_count);
222 assert(y.size() == t.size());
223 assert(t_cost.empty() || t_cost.size() == t.size());
226 for (serial_size_t sample = 0; sample < sample_count; ++sample) {
227 assert(y[sample].size() == channel_count);
228 assert(t[sample].size() == channel_count);
229 assert(t_cost.empty() || t_cost[sample].empty() ||
230 t_cost[sample].size() == channel_count);
232 gradients[sample] = gradient<E>(y[sample], t[sample]);
234 if (sample < t_cost.size()) {
235 apply_cost_if_defined(gradients[sample], t_cost[sample]);
Definition: loss_function.h:92
Definition: loss_function.h:58
Definition: loss_function.h:151
Definition: loss_function.h:127
Definition: loss_function.h:33