Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/nplm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/SoftmaxLoss.h')
-rw-r--r--src/SoftmaxLoss.h159
1 files changed, 79 insertions, 80 deletions
diff --git a/src/SoftmaxLoss.h b/src/SoftmaxLoss.h
index bc55762..d89cde6 100644
--- a/src/SoftmaxLoss.h
+++ b/src/SoftmaxLoss.h
@@ -1,7 +1,6 @@
- #ifndef SOFTMAXLOSS_H
+#ifndef SOFTMAXLOSS_H
#define SOFTMAXLOSS_H
-//#include <../3rdparty/Eigen/Dense>
#include <Eigen/Dense>
#include "multinomial.h"
#include "util.h"
@@ -20,55 +19,55 @@ enum loss_function_type { LogLoss, NCELoss, InvalidLoss };
inline loss_function_type string_to_loss_function (const std::string &s)
{
- if (s == "log")
- return LogLoss;
- else if (s == "nce")
- return NCELoss;
- else
- return InvalidLoss;
+ if (s == "log")
+ return LogLoss;
+ else if (s == "nce")
+ return NCELoss;
+ else
+ return InvalidLoss;
}
inline std::string loss_function_to_string (loss_function_type f)
{
- if (f == LogLoss)
- return "log";
- else if (f == NCELoss)
- return "nce";
+ if (f == LogLoss)
+ return "log";
+ else if (f == NCELoss)
+ return "nce";
}
/// Note: Outputs log-probabilities.
struct SoftmaxLogLoss
{
- template <typename DerivedI, typename DerivedW, typename DerivedO>
- void fProp(const MatrixBase<DerivedI> &input, const MatrixBase<DerivedW> &output_words, const MatrixBase<DerivedO> &output_const, double &loss)
+ template <typename DerivedI, typename DerivedW, typename DerivedO>
+ void fProp(const MatrixBase<DerivedI> &input, const MatrixBase<DerivedW> &output_words, const MatrixBase<DerivedO> &output_const, double &loss)
+ {
+ UNCONST(DerivedO, output_const, output);
+
+ double log_likelihood = 0.0;
+
+#pragma omp parallel for reduction(+:log_likelihood)
+ for (int train_id = 0; train_id < input.cols(); train_id++)
{
- UNCONST(DerivedO, output_const, output);
-
- double log_likelihood = 0.0;
-
- #pragma omp parallel for reduction(+:log_likelihood)
- for (int train_id = 0; train_id < input.cols(); train_id++)
- {
- double normalization = logsum(input.col(train_id));
- output.col(train_id).array() = input.col(train_id).array() - normalization;
- log_likelihood += output(output_words(train_id), train_id);
- }
- loss = log_likelihood;
+ double normalization = logsum(input.col(train_id));
+ output.col(train_id).array() = input.col(train_id).array() - normalization;
+ log_likelihood += output(output_words(train_id), train_id);
}
-
- template <typename DerivedW, typename DerivedO, typename DerivedI>
- void bProp(const MatrixBase<DerivedW> &output_words, const MatrixBase<DerivedO> &output, const MatrixBase<DerivedI> &grad_input_const)
+ loss = log_likelihood;
+ }
+
+ template <typename DerivedW, typename DerivedO, typename DerivedI>
+ void bProp(const MatrixBase<DerivedW> &output_words, const MatrixBase<DerivedO> &output, const MatrixBase<DerivedI> &grad_input_const)
+ {
+ UNCONST(DerivedI, grad_input_const, grad_input);
+ grad_input.setZero();
+#pragma omp parallel for
+ for (int train_id = 0; train_id < output.cols(); train_id++)
{
- UNCONST(DerivedI, grad_input_const, grad_input);
- grad_input.setZero();
- #pragma omp parallel for
- for (int train_id = 0; train_id < output.cols(); train_id++)
- {
- grad_input(output_words(train_id), train_id) += 1.;
- grad_input.col(train_id) -= output.col(train_id).array().exp().matrix();
- }
+ grad_input(output_words(train_id), train_id) += 1.;
+ grad_input.col(train_id) -= output.col(train_id).array().exp().matrix();
}
+ }
};
///// Softmax layer plus NCE loss function.
@@ -81,55 +80,55 @@ struct SoftmaxLogLoss
template <typename Multinomial>
class SoftmaxNCELoss
{
- const Multinomial &unigram;
+ const Multinomial &unigram;
-public:
- SoftmaxNCELoss(const Multinomial &unigram)
+ public:
+ SoftmaxNCELoss(const Multinomial &unigram)
: unigram(unigram)
+ {
+ }
+
+ template <typename DerivedI, typename DerivedW, typename DerivedO>
+ void fProp(const MatrixBase<DerivedI> &scores,
+ const MatrixBase<DerivedW> &minibatch_samples,
+ const MatrixBase<DerivedO> &output_const, double &loss)
+ {
+ UNCONST(DerivedO, output_const, output);
+ double log_likelihood = 0.0;
+ int num_noise_samples = minibatch_samples.rows()-1;
+ double log_num_noise_samples = std::log(num_noise_samples);
+#pragma omp parallel for reduction(+:log_likelihood) schedule(static)
+ for (int train_id = 0; train_id < scores.cols(); train_id++)
{
+ for (int sample_id = 0;sample_id < minibatch_samples.rows(); sample_id++)
+ {
+ int sample = minibatch_samples(sample_id, train_id);
+ // To avoid zero or infinite probabilities,
+ // never take exp of score without normalizing first,
+ // even if it's a little slower...
+ double score = scores(sample_id, train_id);
+ double score_noise = log_num_noise_samples + unigram.logprob(sample);
+ double z = logadd(score, score_noise);
+ double logprob = score - z;
+ double logprob_noise = score_noise - z;
+ output(sample_id, train_id) = std::exp(logprob);
+ log_likelihood += sample_id == 0 ? logprob : logprob_noise;
+ }
}
-
- template <typename DerivedI, typename DerivedW, typename DerivedO>
- void fProp(const MatrixBase<DerivedI> &scores,
- const MatrixBase<DerivedW> &minibatch_samples,
- const MatrixBase<DerivedO> &output_const, double &loss)
- {
- UNCONST(DerivedO, output_const, output);
- double log_likelihood = 0.0;
- int num_noise_samples = minibatch_samples.rows()-1;
- double log_num_noise_samples = std::log(num_noise_samples);
- #pragma omp parallel for reduction(+:log_likelihood) schedule(static)
- for (int train_id = 0; train_id < scores.cols(); train_id++)
- {
- for (int sample_id = 0;sample_id < minibatch_samples.rows(); sample_id++)
- {
- int sample = minibatch_samples(sample_id, train_id);
- // To avoid zero or infinite probabilities,
- // never take exp of score without normalizing first,
- // even if it's a little slower...
- double score = scores(sample_id, train_id);
- double score_noise = log_num_noise_samples + unigram.logprob(sample);
- double z = logadd(score, score_noise);
- double logprob = score - z;
- double logprob_noise = score_noise - z;
- output(sample_id, train_id) = std::exp(logprob);
- log_likelihood += sample_id == 0 ? logprob : logprob_noise;
- }
- }
- loss = log_likelihood;
- }
-
- template <typename DerivedO, typename DerivedI>
- void bProp(const MatrixBase<DerivedO> &probs, const MatrixBase<DerivedI> &output_const)
+ loss = log_likelihood;
+ }
+
+ template <typename DerivedO, typename DerivedI>
+ void bProp(const MatrixBase<DerivedO> &probs, const MatrixBase<DerivedI> &output_const)
+ {
+ UNCONST(DerivedI, output_const, output);
+#pragma omp parallel for schedule(static)
+ for (int train_id = 0; train_id < probs.cols(); train_id++)
{
- UNCONST(DerivedI, output_const, output);
- #pragma omp parallel for schedule(static)
- for (int train_id = 0; train_id < probs.cols(); train_id++)
- {
- output.col(train_id) = -probs.col(train_id);
- output(0, train_id) += 1.0;
- }
+ output.col(train_id) = -probs.col(train_id);
+ output(0, train_id) += 1.0;
}
+ }
};
} // namespace nplm