diff options
author | Tomasz Dwojak <t.dwojak@amu.edu.pl> | 2016-10-21 20:25:44 +0300 |
---|---|---|
committer | Tomasz Dwojak <t.dwojak@amu.edu.pl> | 2016-11-01 21:07:04 +0300 |
commit | 5059b12aba0fcf5b520559891f4239604b7807a8 (patch) | |
tree | c25db197bb7cfb4aa6131c9a2171f68cce1b9aac | |
parent | e877b5760f1d9f04c6e8fb7e7b6b1a09fd9849c3 (diff) |
Move BestHyps outside mblas::Matrix
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/common/base_best_hyps.h | 11 | ||||
-rw-r--r-- | src/common/base_matrix.h | 7 | ||||
-rw-r--r-- | src/common/god.cpp | 9 | ||||
-rw-r--r-- | src/common/god.h | 3 | ||||
-rw-r--r-- | src/common/loader.h | 6 | ||||
-rw-r--r-- | src/common/search.cpp | 10 | ||||
-rw-r--r-- | src/common/search.h | 7 | ||||
-rw-r--r-- | src/cpu/decoder/best_hyps.h | 139 | ||||
-rw-r--r-- | src/cpu/decoder/encoder_decoder.cpp | 12 | ||||
-rw-r--r-- | src/cpu/decoder/encoder_decoder.h | 14 | ||||
-rw-r--r-- | src/cpu/decoder/encoder_decoder_loader.h | 2 | ||||
-rw-r--r-- | src/cpu/mblas/matrix.cpp | 129 | ||||
-rw-r--r-- | src/cpu/mblas/matrix.h | 12 | ||||
-rw-r--r-- | src/gpu/decoder/ape_penalty.cu | 5 | ||||
-rw-r--r-- | src/gpu/decoder/ape_penalty.h | 5 | ||||
-rw-r--r-- | src/gpu/decoder/best_hyps.h | 168 | ||||
-rw-r--r-- | src/gpu/decoder/encoder_decoder.cu | 5 | ||||
-rw-r--r-- | src/gpu/decoder/encoder_decoder.h | 2 |
19 files changed, 377 insertions, 171 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index dd3a7a14..52af5b36 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) project(amunn CXX) set(CMAKE_CXX_FLAGS_RELEASE "-std=c++14 -fPIC -O3 -Ofast -m64 -flto -march=native -funroll-loops -ffinite-math-only -Wno-unused-result -Wno-deprecated -pthread") -set(CMAKE_CXX_FLAGS_DEBUG "-std=c++14 -fPIC -g -m64 -flto -march=native -funroll-loops -ffinite-math-only -Wno-unused-result -Wno-deprecated -pthread") +set(CMAKE_CXX_FLAGS_DEBUG "-std=c++14 -fPIC -g -O0 -m64 -flto -march=native -funroll-loops -ffinite-math-only -Wno-unused-result -Wno-deprecated -pthread") set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -g -pg") set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS_RELEASE}) diff --git a/src/common/base_best_hyps.h b/src/common/base_best_hyps.h new file mode 100644 index 00000000..995b38c3 --- /dev/null +++ b/src/common/base_best_hyps.h @@ -0,0 +1,11 @@ +#pragma once + +#include <functional> +#include <vector> + +#include "common/types.h" +#include "scorer.h" + + +using BestHypsType = std::function<void(Beam&, const Beam&, const size_t, + const std::vector<ScorerPtr>&, const Words&, bool)>; diff --git a/src/common/base_matrix.h b/src/common/base_matrix.h index f12dacba..241b4440 100644 --- a/src/common/base_matrix.h +++ b/src/common/base_matrix.h @@ -20,12 +20,5 @@ class BaseMatrix { virtual size_t Cols() const = 0; virtual void Resize(size_t rows, size_t cols) = 0; - virtual void BestHyps(Beam& bestHyps, - const Beam& prevHyps, - const size_t beamSize, - const std::vector<ScorerPtr> &scorers, - const Words &filterIndices, - bool returnAlignment=false) const = 0; - virtual std::string Debug() const = 0; }; diff --git a/src/common/god.cpp b/src/common/god.cpp index de41bbaa..1d0923dd 100644 --- a/src/common/god.cpp +++ b/src/common/god.cpp @@ -186,6 +186,15 @@ std::vector<ScorerPtr> God::GetScorers(size_t threadId) { return scorers; } +BestHypsType God::GetBestHyps(size_t threadId) { + size_t cpuThreads = God::Get<size_t>("cpu-threads"); + if (threadId < cpuThreads) { + return Summon().cpuLoaders_.begin()->second->GetBestHyps(); + } else { + return Summon().gpuLoaders_.begin()->second->GetBestHyps(); + } +} + std::vector<std::string> God::GetScorerNames() { std::vector<std::string> scorerNames; for(auto&& name : Summon().cpuLoaders_ | boost::adaptors::map_keys) diff --git a/src/common/god.h b/src/common/god.h index a974c86d..1a686f6a 100644 --- a/src/common/god.h +++ b/src/common/god.h @@ -8,6 +8,7 @@ #include "common/scorer.h" #include "common/types.h" #include "common/processor/processor.h" +#include "common/base_best_hyps.h" class Weights; class Vocab; @@ -45,6 +46,8 @@ class God { static Filter& GetFilter(); + static BestHypsType GetBestHyps(size_t threadId); + static std::vector<ScorerPtr> GetScorers(size_t); static std::vector<std::string> GetScorerNames(); static std::map<std::string, float>& GetScorerWeights(); diff --git a/src/common/loader.h b/src/common/loader.h index 970f190a..b3326d01 100644 --- a/src/common/loader.h +++ b/src/common/loader.h @@ -4,6 +4,7 @@ #include <yaml-cpp/yaml.h> #include "scorer.h" +#include "common/base_best_hyps.h" class Loader { public: @@ -19,18 +20,19 @@ class Loader { bool Has(const std::string& key) { return config_[key]; } - + template <typename T> T Get(const std::string& key) { return config_[key].as<T>(); } virtual ScorerPtr NewScorer(size_t) = 0; + virtual BestHypsType GetBestHyps() = 0; const std::string& GetName() { return name_; } - + protected: const std::string name_; const YAML::Node config_; diff --git a/src/common/search.cpp b/src/common/search.cpp index 635798dc..5c8f4780 100644 --- a/src/common/search.cpp +++ b/src/common/search.cpp @@ -3,15 +3,19 @@ #include <boost/timer/timer.hpp> #include "common/god.h" +#include "common/history.h" #include "common/filter.h" #include "common/base_matrix.h" using namespace std; Search::Search(size_t threadId) - : scorers_(God::GetScorers(threadId)) {} + : scorers_(God::GetScorers(threadId)), + BestHyps_(God::GetBestHyps(threadId)) { +} + -size_t Search::MakeFilter(const Words& srcWords, const size_t vocabSize) { +size_t Search::MakeFilter(const Words& srcWords, size_t vocabSize) { filterIndices_ = God::GetFilter().GetFilteredVocab(srcWords, vocabSize); for (size_t i = 0; i < scorers_.size(); i++) { scorers_[i]->Filter(filterIndices_); @@ -68,7 +72,7 @@ History Search::Decode(const Sentence& sentence) { bool returnAlignment = God::Get<bool>("return-alignment"); - scorers_[0]->GetProbs().BestHyps(hyps, prevHyps, beamSize, scorers_, filterIndices_, + BestHyps_(hyps, prevHyps, beamSize, scorers_, filterIndices_, returnAlignment); history.Add(hyps, history.size() == maxLength); diff --git a/src/common/search.h b/src/common/search.h index 1a301d23..e0f3f815 100644 --- a/src/common/search.h +++ b/src/common/search.h @@ -4,7 +4,9 @@ #include "common/scorer.h" #include "common/sentence.h" -#include "common/history.h" +#include "common/base_best_hyps.h" + +class History; class Search { public: @@ -12,7 +14,8 @@ class Search { History Decode(const Sentence& sentence); private: - size_t MakeFilter(const Words& srcWords, const size_t vocabSize); + size_t MakeFilter(const Words& srcWords, size_t vocabSize); std::vector<ScorerPtr> scorers_; Words filterIndices_; + BestHypsType BestHyps_; }; diff --git a/src/cpu/decoder/best_hyps.h b/src/cpu/decoder/best_hyps.h new file mode 100644 index 00000000..791ece68 --- /dev/null +++ b/src/cpu/decoder/best_hyps.h @@ -0,0 +1,139 @@ +#pragma once + +#include <vector> +#include <boost/iterator/permutation_iterator.hpp> + +#include "common/scorer.h" +#include "common/god.h" +#include "common/exception.h" +#include "cpu/mblas/matrix.h" + +namespace CPU { + +struct ProbCompare { + ProbCompare(const float* data) : data_(data) {} + + bool operator()(const unsigned a, const unsigned b) { + return data_[a] > data_[b]; + } + + const float* data_; +}; + +void BestHyps(Beam& bestHyps, + const Beam& prevHyps, + const size_t beamSize, + const std::vector<ScorerPtr> &scorers, + const Words &filterIndices, + bool returnAlignment) +{ + using namespace mblas; + + auto& weights = God::GetScorerWeights(); + + mblas::ArrayMatrix& Probs = static_cast<mblas::ArrayMatrix&>(scorers[0]->GetProbs()); + + mblas::ArrayMatrix Costs(Probs.rows(), 1); + for (size_t i = 0; i < prevHyps.size(); ++i) { + Costs.data()[i] = prevHyps[i]->GetCost(); + } + + Probs *= weights[scorers[0]->GetName()]; + AddBiasVector<byColumn>(Probs, Costs); + + for (size_t i = 1; i < scorers.size(); ++i) { + mblas::ArrayMatrix &currProb = static_cast<mblas::ArrayMatrix&>(scorers[i]->GetProbs()); + + Probs += weights[scorers[i]->GetName()] * currProb; + } + + size_t size = Probs.rows() * Probs.columns(); // Probs.size(); + std::vector<size_t> keys(size); + for (size_t i = 0; i < keys.size(); ++i) { + keys[i] = i; + } + + std::vector<size_t> bestKeys(beamSize); + std::vector<float> bestCosts(beamSize); + + if (!God::Get<bool>("allow-unk")) { + blaze::column(Probs, UNK) = std::numeric_limits<float>::lowest(); + } + + std::nth_element(keys.begin(), keys.begin() + beamSize, keys.end(), + ProbCompare(Probs.data())); + + for (size_t i = 0; i < beamSize; ++i) { + bestKeys[i] = keys[i]; + bestCosts[i] = Probs.data()[keys[i]]; + } + + std::vector<std::vector<float>> breakDowns; + bool doBreakdown = God::Get<bool>("n-best"); + if (doBreakdown) { + breakDowns.push_back(bestCosts); + for (auto& scorer : scorers) { + std::vector<float> modelCosts(beamSize); + mblas::ArrayMatrix &currProb = static_cast<mblas::ArrayMatrix&>(scorer->GetProbs()); + + auto it = boost::make_permutation_iterator(currProb.begin(), keys.begin()); + std::copy(it, it + beamSize, modelCosts.begin()); + breakDowns.push_back(modelCosts); + } + } + + bool filter = God::Get<std::vector<std::string>>("softmax-filter").size(); + + for (size_t i = 0; i < beamSize; i++) { + size_t wordIndex = bestKeys[i] % Probs.columns(); + + if (filter) { + wordIndex = filterIndices[wordIndex]; + } + + size_t hypIndex = bestKeys[i] / Probs.columns(); + float cost = bestCosts[i]; + + HypothesisPtr hyp; + if (returnAlignment) { + std::vector<SoftAlignmentPtr> alignments; + for (auto& scorer : scorers) { + if (CPU::EncoderDecoder* encdec = dynamic_cast<CPU::EncoderDecoder*>(scorer.get())) { + auto& attention = encdec->GetAttention(); + alignments.emplace_back(new SoftAlignment(attention.begin(hypIndex), + attention.end(hypIndex))); + } else { + UTIL_THROW2("Return Alignment is allowed only with Nematus scorer."); + } + } + + hyp.reset(new Hypothesis(prevHyps[hypIndex], wordIndex, hypIndex, cost, alignments)); + } else { + hyp.reset(new Hypothesis(prevHyps[hypIndex], wordIndex, hypIndex, cost)); + } + + if (doBreakdown) { + hyp->GetCostBreakdown().resize(scorers.size()); + float sum = 0; + for(size_t j = 0; j < scorers.size(); ++j) { + if (j == 0) { + hyp->GetCostBreakdown()[0] = breakDowns[0][i]; + } else { + float cost = 0; + if (j < scorers.size()) { + if (prevHyps[hypIndex]->GetCostBreakdown().size() < scorers.size()) + const_cast<HypothesisPtr&>(prevHyps[hypIndex])->GetCostBreakdown().resize(scorers.size(), 0.0); + cost = breakDowns[j][i] + const_cast<HypothesisPtr&>(prevHyps[hypIndex])->GetCostBreakdown()[j]; + } + sum += weights[scorers[j]->GetName()] * cost; + hyp->GetCostBreakdown()[j] = cost; + } + } + hyp->GetCostBreakdown()[0] -= sum; + hyp->GetCostBreakdown()[0] /= weights[scorers[0]->GetName()]; + } + bestHyps.push_back(hyp); + } + +} +} diff --git a/src/cpu/decoder/encoder_decoder.cpp b/src/cpu/decoder/encoder_decoder.cpp index e1cf4a95..f43d1860 100644 --- a/src/cpu/decoder/encoder_decoder.cpp +++ b/src/cpu/decoder/encoder_decoder.cpp @@ -1,18 +1,20 @@ -#include "encoder_decoder.h" +#include "cpu/decoder/encoder_decoder.h" +#include "cpu/decoder/encoder_decoder_loader.h" #include <vector> #include <yaml-cpp/yaml.h> #include "common/threadpool.h" -#include "../dl4mt/dl4mt.h" +#include "cpu/dl4mt/dl4mt.h" #include "common/god.h" #include "common/loader.h" #include "common/scorer.h" #include "common/sentence.h" -#include "../mblas/matrix.h" +#include "cpu/mblas/matrix.h" +#include "cpu/decoder/best_hyps.h" using namespace std; @@ -145,5 +147,9 @@ ScorerPtr EncoderDecoderLoader::NewScorer(const size_t) { tab, *weights_[0])); } +BestHypsType EncoderDecoderLoader::GetBestHyps() { + return CPU::BestHyps; +} + } diff --git a/src/cpu/decoder/encoder_decoder.h b/src/cpu/decoder/encoder_decoder.h index 153f8e2d..840c57d7 100644 --- a/src/cpu/decoder/encoder_decoder.h +++ b/src/cpu/decoder/encoder_decoder.h @@ -84,18 +84,4 @@ class EncoderDecoder : public Scorer { mblas::Matrix SourceContext_; }; -//////////////////////////////////////////////// -class EncoderDecoderLoader : public Loader { - public: - EncoderDecoderLoader(const std::string name, - const YAML::Node& config); - - virtual void Load(); - - virtual ScorerPtr NewScorer(const size_t taskId); - - private: - std::vector<std::unique_ptr<Weights>> weights_; -}; - } diff --git a/src/cpu/decoder/encoder_decoder_loader.h b/src/cpu/decoder/encoder_decoder_loader.h index 7346b58e..0c98739c 100644 --- a/src/cpu/decoder/encoder_decoder_loader.h +++ b/src/cpu/decoder/encoder_decoder_loader.h @@ -7,6 +7,7 @@ #include "common/scorer.h" #include "common/loader.h" #include "common/logging.h" +#include "common/base_best_hyps.h" namespace CPU { @@ -20,6 +21,7 @@ class EncoderDecoderLoader : public Loader { virtual void Load(); virtual ScorerPtr NewScorer(const size_t taskId); + BestHypsType GetBestHyps(); private: std::vector<std::unique_ptr<Weights>> weights_; diff --git a/src/cpu/mblas/matrix.cpp b/src/cpu/mblas/matrix.cpp index 64b5d330..22eae5ad 100644 --- a/src/cpu/mblas/matrix.cpp +++ b/src/cpu/mblas/matrix.cpp @@ -17,135 +17,6 @@ namespace CPU { namespace mblas { -///////////////////////////////////////////////////////////////////// -struct ProbCompare { - ProbCompare(const float* data) : data_(data) {} - - bool operator()(const unsigned a, const unsigned b) { - return data_[a] > data_[b]; - } - - const float* data_; -}; - -///////////////////////////////////////////////////////////////////// -void ArrayMatrix::BestHyps(Beam& bestHyps, - const Beam& prevHyps, - const size_t beamSize, - const std::vector<ScorerPtr> &scorers, - const Words &filterIndices, - bool returnAlignment) const -{ - using namespace mblas; - - auto& weights = God::GetScorerWeights(); - - mblas::ArrayMatrix& Probs = static_cast<mblas::ArrayMatrix&>(scorers[0]->GetProbs()); - - mblas::ArrayMatrix Costs(Probs.rows(), 1); - for (size_t i = 0; i < prevHyps.size(); ++i) { - Costs.data()[i] = prevHyps[i]->GetCost(); - } - - Probs *= weights[scorers[0]->GetName()]; - AddBiasVector<byColumn>(Probs, Costs); - - for (size_t i = 1; i < scorers.size(); ++i) { - mblas::ArrayMatrix &currProb = static_cast<mblas::ArrayMatrix&>(scorers[i]->GetProbs()); - - Probs += weights[scorers[i]->GetName()] * currProb; - } - - size_t size = Probs.rows() * Probs.columns(); // Probs.size(); - std::vector<size_t> keys(size); - for (size_t i = 0; i < keys.size(); ++i) { - keys[i] = i; - } - - std::vector<size_t> bestKeys(beamSize); - std::vector<float> bestCosts(beamSize); - - if (!God::Get<bool>("allow-unk")) { - blaze::column(Probs, UNK) = std::numeric_limits<float>::lowest(); - } - - std::nth_element(keys.begin(), keys.begin() + beamSize, keys.end(), - ProbCompare(Probs.data())); - - for (size_t i = 0; i < beamSize; ++i) { - bestKeys[i] = keys[i]; - bestCosts[i] = Probs.data()[keys[i]]; - } - - std::vector<HostVector<float>> breakDowns; - bool doBreakdown = God::Get<bool>("n-best"); - if (doBreakdown) { - breakDowns.push_back(bestCosts); - for (auto& scorer : scorers) { - HostVector<float> modelCosts(beamSize); - mblas::ArrayMatrix &currProb = static_cast<mblas::ArrayMatrix&>(scorer->GetProbs()); - - auto it = boost::make_permutation_iterator(currProb.begin(), keys.begin()); - std::copy(it, it + beamSize, modelCosts.begin()); - breakDowns.push_back(modelCosts); - } - } - - bool filter = God::Get<std::vector<std::string>>("softmax-filter").size(); - - for (size_t i = 0; i < beamSize; i++) { - size_t wordIndex = bestKeys[i] % Probs.columns(); - - if (filter) { - wordIndex = filterIndices[wordIndex]; - } - - size_t hypIndex = bestKeys[i] / Probs.columns(); - float cost = bestCosts[i]; - - HypothesisPtr hyp; - if (returnAlignment) { - std::vector<SoftAlignmentPtr> alignments; - for (auto& scorer : scorers) { - if (CPU::EncoderDecoder* encdec = dynamic_cast<CPU::EncoderDecoder*>(scorer.get())) { - auto& attention = encdec->GetAttention(); - alignments.emplace_back(new SoftAlignment(attention.begin(hypIndex), - attention.end(hypIndex))); - } else { - UTIL_THROW2("Return Alignment is allowed only with Nematus scorer."); - } - } - - hyp.reset(new Hypothesis(prevHyps[hypIndex], wordIndex, hypIndex, cost, alignments)); - } else { - hyp.reset(new Hypothesis(prevHyps[hypIndex], wordIndex, hypIndex, cost)); - } - - if (doBreakdown) { - hyp->GetCostBreakdown().resize(scorers.size()); - float sum = 0; - for(size_t j = 0; j < scorers.size(); ++j) { - if (j == 0) { - hyp->GetCostBreakdown()[0] = breakDowns[0][i]; - } else { - float cost = 0; - if (j < scorers.size()) { - if (prevHyps[hypIndex]->GetCostBreakdown().size() < scorers.size()) - const_cast<HypothesisPtr&>(prevHyps[hypIndex])->GetCostBreakdown().resize(scorers.size(), 0.0); - cost = breakDowns[j][i] + const_cast<HypothesisPtr&>(prevHyps[hypIndex])->GetCostBreakdown()[j]; - } - sum += weights[scorers[j]->GetName()] * cost; - hyp->GetCostBreakdown()[j] = cost; - } - } - hyp->GetCostBreakdown()[0] -= sum; - hyp->GetCostBreakdown()[0] /= weights[scorers[0]->GetName()]; - } - bestHyps.push_back(hyp); - } -} - - } } diff --git a/src/cpu/mblas/matrix.h b/src/cpu/mblas/matrix.h index 1eb74323..b42db9bc 100644 --- a/src/cpu/mblas/matrix.h +++ b/src/cpu/mblas/matrix.h @@ -129,17 +129,11 @@ class ArrayMatrix : public BlazeMatrix<float, blaze::rowMajor> : Parent(rows, columns, val) {} - template <class MT> - ArrayMatrix(const MT& rhs) - :Parent(rhs) + template <class MT> + ArrayMatrix(const MT& rhs) + : Parent(rhs) {} - virtual void BestHyps(Beam& bestHyps, const Beam& prevHyps, - const size_t beamSize, - const std::vector<ScorerPtr> &scorers, - const Words &filterIndices, - bool returnAlignment) const; - }; //////////////////////////////////////////////////////////////////////// diff --git a/src/gpu/decoder/ape_penalty.cu b/src/gpu/decoder/ape_penalty.cu index 639a8e63..3cdd052d 100644 --- a/src/gpu/decoder/ape_penalty.cu +++ b/src/gpu/decoder/ape_penalty.cu @@ -2,6 +2,7 @@ #include "common/god.h" #include "common/vocab.h" #include "gpu/types-gpu.h" +#include "gpu/decoder/best_hyps.h" namespace GPU { @@ -90,5 +91,9 @@ ScorerPtr ApePenaltyLoader::NewScorer(size_t taskId) { srcTrgMap_, penalties_)); } +BestHypsType ApePenaltyLoader::GetBestHyps() { + return GPU::BestHyps(); +} + } diff --git a/src/gpu/decoder/ape_penalty.h b/src/gpu/decoder/ape_penalty.h index 662da558..992d987d 100644 --- a/src/gpu/decoder/ape_penalty.h +++ b/src/gpu/decoder/ape_penalty.h @@ -5,9 +5,11 @@ #include "common/types.h" #include "common/file_stream.h" #include "common/scorer.h" -#include "gpu/mblas/matrix.h" +#include "common/base_best_hyps.h" #include "common/loader.h" +#include "gpu/mblas/matrix.h" + namespace GPU { typedef std::vector<Word> SrcTrgMap; @@ -67,6 +69,7 @@ class ApePenaltyLoader : public Loader { virtual void Load(); virtual ScorerPtr NewScorer(size_t taskId); + virtual BestHypsType GetBestHyps(); private: SrcTrgMap srcTrgMap_; diff --git a/src/gpu/decoder/best_hyps.h b/src/gpu/decoder/best_hyps.h new file mode 100644 index 00000000..d7529f0d --- /dev/null +++ b/src/gpu/decoder/best_hyps.h @@ -0,0 +1,168 @@ +#pragma once + +#include "common/scorer.h" +#include "gpu/mblas/matrix.h" + +#include <thrust/device_vector.h> +#include <thrust/functional.h> +#include <thrust/execution_policy.h> + +namespace GPU { + +struct ProbCompare { + ProbCompare(const float* data) : data_(data) {} + + __host__ __device__ + bool operator()(const unsigned a, const unsigned b) { + return data_[a] > data_[b]; + } + + const float* data_; +}; + +class BestHyps { + public: + void operator()(Beam& bestHyps, + const Beam& prevHyps, + const size_t beamSize, + const std::vector<ScorerPtr>& scorers, + const Words& filterIndices, + bool returnAlignment) { + using namespace mblas; + + auto& weights = God::GetScorerWeights(); + + mblas::Matrix& Probs = static_cast<mblas::Matrix&>(scorers[0]->GetProbs()); + + Costs.reserve(Probs.Rows()); + HostVector<float> vCosts; + for (auto& h : prevHyps) { + vCosts.push_back(h->GetCost()); + } + thrust::copy(thrust::cuda::par.on(Matrix::GetStream()), + vCosts.begin(), vCosts.end(), Costs.begin()); + + BroadcastVecColumn(weights[scorers[0]->GetName()] * _1 + _2, + Probs, Costs); + for (size_t i = 1; i < scorers.size(); ++i) { + mblas::Matrix &currProbs = static_cast<mblas::Matrix&>(scorers[i]->GetProbs()); + + Element(_1 + weights[scorers[i]->GetName()] * _2, + Probs, currProbs); + } + + keys.resize(Probs.size()); + thrust::host_vector<unsigned> bestKeys(beamSize); + thrust::host_vector<float> bestCosts(beamSize); + + // @TODO: make this more efficient + if (!God::Get<bool>("allow-unk")) { + for (size_t i = 0; i < Probs.Rows(); i++) + Probs.Set(i, UNK, std::numeric_limits<float>::lowest()); + } + + // @TODO: Here we need to have a partial sort + if (beamSize < 10) { + for (size_t i = 0; i < beamSize; ++i) { + DeviceVector<float>::iterator iter = + thrust::max_element(thrust::cuda::par.on(Matrix::GetStream()), + Probs.begin(), Probs.end()); + bestKeys[i] = iter - Probs.begin(); + bestCosts[i] = *iter; + *iter = std::numeric_limits<float>::lowest(); + } + algo::copy(thrust::cuda::par.on(Matrix::GetStream()), + bestKeys.begin(), bestKeys.end(), keys.begin()); + } + else { + // these two function do not have equivalents in + // in the standard library or boost, keeping thrust + // namespace for now + thrust::sequence(thrust::cuda::par.on(Matrix::GetStream()), + keys.begin(), keys.end()); + thrust::sort_by_key(thrust::cuda::par.on(Matrix::GetStream()), Probs.begin(), Probs.end(), + keys.begin(), algo::greater<float>()); + + thrust::copy_n(thrust::cuda::par.on(Matrix::GetStream()), + keys.begin(), beamSize, bestKeys.begin()); + thrust::copy_n(thrust::cuda::par.on(Matrix::GetStream()), + Probs.begin(), beamSize, bestCosts.begin()); + } + + + std::vector<HostVector<float>> breakDowns; + bool doBreakdown = God::Get<bool>("n-best"); + if (doBreakdown) { + breakDowns.push_back(bestCosts); + for (size_t i = 1; i < scorers.size(); ++i) { + HostVector<float> modelCosts(beamSize); + mblas::Matrix &currProbs = static_cast<mblas::Matrix&>(scorers[i]->GetProbs()); + + auto it = iteralgo::make_permutation_iterator(currProbs.begin(), keys.begin()); + algo::copy(thrust::cuda::par.on(Matrix::GetStream()), + it, it + beamSize, modelCosts.begin()); + breakDowns.push_back(modelCosts); + } + } + + bool filter = God::Get<std::vector<std::string>>("softmax-filter").size(); + + for (size_t i = 0; i < beamSize; i++) { + size_t wordIndex = bestKeys[i] % Probs.Cols(); + if (filter) { + wordIndex = filterIndices[wordIndex]; + } + + size_t hypIndex = bestKeys[i] / Probs.Cols(); + float cost = bestCosts[i]; + + HypothesisPtr hyp; + if (returnAlignment) { + std::vector<SoftAlignmentPtr> alignments; + for (auto& scorer : scorers) { + if (GPU::EncoderDecoder* encdec = dynamic_cast<GPU::EncoderDecoder*>(scorer.get())) { + auto& attention = encdec->GetAttention(); + size_t attLength = attention.Cols(); + + alignments.emplace_back(new SoftAlignment(attention.begin() + hypIndex * attLength, + attention.begin() + (hypIndex + 1) * attLength)); + } else { + UTIL_THROW2("Return Alignment is allowed only with Nematus scorer."); + } + } + hyp.reset(new Hypothesis(prevHyps[hypIndex], wordIndex, hypIndex, cost, alignments)); + } else { + hyp.reset(new Hypothesis(prevHyps[hypIndex], wordIndex, hypIndex, cost)); + } + + if(doBreakdown) { + hyp->GetCostBreakdown().resize(scorers.size()); + float sum = 0; + for (size_t j = 0; j < scorers.size(); ++j) { + if (j == 0) + hyp->GetCostBreakdown()[0] = breakDowns[0][i]; + else { + float cost = 0; + if (j < scorers.size()) { + if(prevHyps[hypIndex]->GetCostBreakdown().size() < scorers.size()) + const_cast<HypothesisPtr&>(prevHyps[hypIndex])->GetCostBreakdown().resize(scorers.size(), 0.0); + cost = breakDowns[j][i] + const_cast<HypothesisPtr&>(prevHyps[hypIndex])->GetCostBreakdown()[j]; + } + sum += weights[scorers[j]->GetName()] * cost; + hyp->GetCostBreakdown()[j] = cost; + } + } + hyp->GetCostBreakdown()[0] -= sum; + hyp->GetCostBreakdown()[0] /= weights[scorers[0]->GetName()]; + } + bestHyps.push_back(hyp); + } + } + + private: + mutable thrust::device_vector<unsigned> keys; + mutable thrust::device_vector<float> Costs; + +}; + +} diff --git a/src/gpu/decoder/encoder_decoder.cu b/src/gpu/decoder/encoder_decoder.cu index 810f46cb..2330cc65 100644 --- a/src/gpu/decoder/encoder_decoder.cu +++ b/src/gpu/decoder/encoder_decoder.cu @@ -6,6 +6,7 @@ #include "gpu/mblas/matrix.h" #include "gpu/dl4mt/dl4mt.h" #include "gpu/decoder/encoder_decoder_state.h" +#include "gpu/decoder/best_hyps.h" using namespace std; @@ -141,5 +142,9 @@ ScorerPtr EncoderDecoderLoader::NewScorer(size_t taskId) { tab, *weights_[i])); } +BestHypsType EncoderDecoderLoader::GetBestHyps() { + return GPU::BestHyps(); +} + } diff --git a/src/gpu/decoder/encoder_decoder.h b/src/gpu/decoder/encoder_decoder.h index 9f555500..dad42b09 100644 --- a/src/gpu/decoder/encoder_decoder.h +++ b/src/gpu/decoder/encoder_decoder.h @@ -5,6 +5,7 @@ #include "common/scorer.h" #include "common/loader.h" +#include "common/base_best_hyps.h" #include "common/threadpool.h" #include <thrust/device_vector.h> @@ -73,6 +74,7 @@ class EncoderDecoderLoader : public Loader { virtual void Load(); virtual ScorerPtr NewScorer(size_t taskId); + virtual BestHypsType GetBestHyps(); private: std::vector<std::unique_ptr<Weights>> weights_; |