Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/marian.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomasz Dwojak <t.dwojak@amu.edu.pl>2016-10-21 20:25:44 +0300
committerTomasz Dwojak <t.dwojak@amu.edu.pl>2016-11-01 21:07:04 +0300
commit5059b12aba0fcf5b520559891f4239604b7807a8 (patch)
treec25db197bb7cfb4aa6131c9a2171f68cce1b9aac
parente877b5760f1d9f04c6e8fb7e7b6b1a09fd9849c3 (diff)
Move BestHyps outside mblas::Matrix
-rw-r--r--CMakeLists.txt2
-rw-r--r--src/common/base_best_hyps.h11
-rw-r--r--src/common/base_matrix.h7
-rw-r--r--src/common/god.cpp9
-rw-r--r--src/common/god.h3
-rw-r--r--src/common/loader.h6
-rw-r--r--src/common/search.cpp10
-rw-r--r--src/common/search.h7
-rw-r--r--src/cpu/decoder/best_hyps.h139
-rw-r--r--src/cpu/decoder/encoder_decoder.cpp12
-rw-r--r--src/cpu/decoder/encoder_decoder.h14
-rw-r--r--src/cpu/decoder/encoder_decoder_loader.h2
-rw-r--r--src/cpu/mblas/matrix.cpp129
-rw-r--r--src/cpu/mblas/matrix.h12
-rw-r--r--src/gpu/decoder/ape_penalty.cu5
-rw-r--r--src/gpu/decoder/ape_penalty.h5
-rw-r--r--src/gpu/decoder/best_hyps.h168
-rw-r--r--src/gpu/decoder/encoder_decoder.cu5
-rw-r--r--src/gpu/decoder/encoder_decoder.h2
19 files changed, 377 insertions, 171 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index dd3a7a14..52af5b36 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,7 +5,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
project(amunn CXX)
set(CMAKE_CXX_FLAGS_RELEASE "-std=c++14 -fPIC -O3 -Ofast -m64 -flto -march=native -funroll-loops -ffinite-math-only -Wno-unused-result -Wno-deprecated -pthread")
-set(CMAKE_CXX_FLAGS_DEBUG "-std=c++14 -fPIC -g -m64 -flto -march=native -funroll-loops -ffinite-math-only -Wno-unused-result -Wno-deprecated -pthread")
+set(CMAKE_CXX_FLAGS_DEBUG "-std=c++14 -fPIC -g -O0 -m64 -flto -march=native -funroll-loops -ffinite-math-only -Wno-unused-result -Wno-deprecated -pthread")
set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -g -pg")
set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS_RELEASE})
diff --git a/src/common/base_best_hyps.h b/src/common/base_best_hyps.h
new file mode 100644
index 00000000..995b38c3
--- /dev/null
+++ b/src/common/base_best_hyps.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <functional>
+#include <vector>
+
+#include "common/types.h"
+#include "scorer.h"
+
+
+using BestHypsType = std::function<void(Beam&, const Beam&, const size_t,
+ const std::vector<ScorerPtr>&, const Words&, bool)>;
diff --git a/src/common/base_matrix.h b/src/common/base_matrix.h
index f12dacba..241b4440 100644
--- a/src/common/base_matrix.h
+++ b/src/common/base_matrix.h
@@ -20,12 +20,5 @@ class BaseMatrix {
virtual size_t Cols() const = 0;
virtual void Resize(size_t rows, size_t cols) = 0;
- virtual void BestHyps(Beam& bestHyps,
- const Beam& prevHyps,
- const size_t beamSize,
- const std::vector<ScorerPtr> &scorers,
- const Words &filterIndices,
- bool returnAlignment=false) const = 0;
-
virtual std::string Debug() const = 0;
};
diff --git a/src/common/god.cpp b/src/common/god.cpp
index de41bbaa..1d0923dd 100644
--- a/src/common/god.cpp
+++ b/src/common/god.cpp
@@ -186,6 +186,15 @@ std::vector<ScorerPtr> God::GetScorers(size_t threadId) {
return scorers;
}
+BestHypsType God::GetBestHyps(size_t threadId) {
+ size_t cpuThreads = God::Get<size_t>("cpu-threads");
+ if (threadId < cpuThreads) {
+ return Summon().cpuLoaders_.begin()->second->GetBestHyps();
+ } else {
+ return Summon().gpuLoaders_.begin()->second->GetBestHyps();
+ }
+}
+
std::vector<std::string> God::GetScorerNames() {
std::vector<std::string> scorerNames;
for(auto&& name : Summon().cpuLoaders_ | boost::adaptors::map_keys)
diff --git a/src/common/god.h b/src/common/god.h
index a974c86d..1a686f6a 100644
--- a/src/common/god.h
+++ b/src/common/god.h
@@ -8,6 +8,7 @@
#include "common/scorer.h"
#include "common/types.h"
#include "common/processor/processor.h"
+#include "common/base_best_hyps.h"
class Weights;
class Vocab;
@@ -45,6 +46,8 @@ class God {
static Filter& GetFilter();
+ static BestHypsType GetBestHyps(size_t threadId);
+
static std::vector<ScorerPtr> GetScorers(size_t);
static std::vector<std::string> GetScorerNames();
static std::map<std::string, float>& GetScorerWeights();
diff --git a/src/common/loader.h b/src/common/loader.h
index 970f190a..b3326d01 100644
--- a/src/common/loader.h
+++ b/src/common/loader.h
@@ -4,6 +4,7 @@
#include <yaml-cpp/yaml.h>
#include "scorer.h"
+#include "common/base_best_hyps.h"
class Loader {
public:
@@ -19,18 +20,19 @@ class Loader {
bool Has(const std::string& key) {
return config_[key];
}
-
+
template <typename T>
T Get(const std::string& key) {
return config_[key].as<T>();
}
virtual ScorerPtr NewScorer(size_t) = 0;
+ virtual BestHypsType GetBestHyps() = 0;
const std::string& GetName() {
return name_;
}
-
+
protected:
const std::string name_;
const YAML::Node config_;
diff --git a/src/common/search.cpp b/src/common/search.cpp
index 635798dc..5c8f4780 100644
--- a/src/common/search.cpp
+++ b/src/common/search.cpp
@@ -3,15 +3,19 @@
#include <boost/timer/timer.hpp>
#include "common/god.h"
+#include "common/history.h"
#include "common/filter.h"
#include "common/base_matrix.h"
using namespace std;
Search::Search(size_t threadId)
- : scorers_(God::GetScorers(threadId)) {}
+ : scorers_(God::GetScorers(threadId)),
+ BestHyps_(God::GetBestHyps(threadId)) {
+}
+
-size_t Search::MakeFilter(const Words& srcWords, const size_t vocabSize) {
+size_t Search::MakeFilter(const Words& srcWords, size_t vocabSize) {
filterIndices_ = God::GetFilter().GetFilteredVocab(srcWords, vocabSize);
for (size_t i = 0; i < scorers_.size(); i++) {
scorers_[i]->Filter(filterIndices_);
@@ -68,7 +72,7 @@ History Search::Decode(const Sentence& sentence) {
bool returnAlignment = God::Get<bool>("return-alignment");
- scorers_[0]->GetProbs().BestHyps(hyps, prevHyps, beamSize, scorers_, filterIndices_,
+ BestHyps_(hyps, prevHyps, beamSize, scorers_, filterIndices_,
returnAlignment);
history.Add(hyps, history.size() == maxLength);
diff --git a/src/common/search.h b/src/common/search.h
index 1a301d23..e0f3f815 100644
--- a/src/common/search.h
+++ b/src/common/search.h
@@ -4,7 +4,9 @@
#include "common/scorer.h"
#include "common/sentence.h"
-#include "common/history.h"
+#include "common/base_best_hyps.h"
+
+class History;
class Search {
public:
@@ -12,7 +14,8 @@ class Search {
History Decode(const Sentence& sentence);
private:
- size_t MakeFilter(const Words& srcWords, const size_t vocabSize);
+ size_t MakeFilter(const Words& srcWords, size_t vocabSize);
std::vector<ScorerPtr> scorers_;
Words filterIndices_;
+ BestHypsType BestHyps_;
};
diff --git a/src/cpu/decoder/best_hyps.h b/src/cpu/decoder/best_hyps.h
new file mode 100644
index 00000000..791ece68
--- /dev/null
+++ b/src/cpu/decoder/best_hyps.h
@@ -0,0 +1,139 @@
+#pragma once
+
+#include <vector>
+#include <boost/iterator/permutation_iterator.hpp>
+
+#include "common/scorer.h"
+#include "common/god.h"
+#include "common/exception.h"
+#include "cpu/mblas/matrix.h"
+
+namespace CPU {
+
+struct ProbCompare {
+ ProbCompare(const float* data) : data_(data) {}
+
+ bool operator()(const unsigned a, const unsigned b) {
+ return data_[a] > data_[b];
+ }
+
+ const float* data_;
+};
+
+void BestHyps(Beam& bestHyps,
+ const Beam& prevHyps,
+ const size_t beamSize,
+ const std::vector<ScorerPtr> &scorers,
+ const Words &filterIndices,
+ bool returnAlignment)
+{
+ using namespace mblas;
+
+ auto& weights = God::GetScorerWeights();
+
+ mblas::ArrayMatrix& Probs = static_cast<mblas::ArrayMatrix&>(scorers[0]->GetProbs());
+
+ mblas::ArrayMatrix Costs(Probs.rows(), 1);
+ for (size_t i = 0; i < prevHyps.size(); ++i) {
+ Costs.data()[i] = prevHyps[i]->GetCost();
+ }
+
+ Probs *= weights[scorers[0]->GetName()];
+ AddBiasVector<byColumn>(Probs, Costs);
+
+ for (size_t i = 1; i < scorers.size(); ++i) {
+ mblas::ArrayMatrix &currProb = static_cast<mblas::ArrayMatrix&>(scorers[i]->GetProbs());
+
+ Probs += weights[scorers[i]->GetName()] * currProb;
+ }
+
+ size_t size = Probs.rows() * Probs.columns(); // Probs.size();
+ std::vector<size_t> keys(size);
+ for (size_t i = 0; i < keys.size(); ++i) {
+ keys[i] = i;
+ }
+
+ std::vector<size_t> bestKeys(beamSize);
+ std::vector<float> bestCosts(beamSize);
+
+ if (!God::Get<bool>("allow-unk")) {
+ blaze::column(Probs, UNK) = std::numeric_limits<float>::lowest();
+ }
+
+ std::nth_element(keys.begin(), keys.begin() + beamSize, keys.end(),
+ ProbCompare(Probs.data()));
+
+ for (size_t i = 0; i < beamSize; ++i) {
+ bestKeys[i] = keys[i];
+ bestCosts[i] = Probs.data()[keys[i]];
+ }
+
+ std::vector<std::vector<float>> breakDowns;
+ bool doBreakdown = God::Get<bool>("n-best");
+ if (doBreakdown) {
+ breakDowns.push_back(bestCosts);
+ for (auto& scorer : scorers) {
+ std::vector<float> modelCosts(beamSize);
+ mblas::ArrayMatrix &currProb = static_cast<mblas::ArrayMatrix&>(scorer->GetProbs());
+
+ auto it = boost::make_permutation_iterator(currProb.begin(), keys.begin());
+ std::copy(it, it + beamSize, modelCosts.begin());
+ breakDowns.push_back(modelCosts);
+ }
+ }
+
+ bool filter = God::Get<std::vector<std::string>>("softmax-filter").size();
+
+ for (size_t i = 0; i < beamSize; i++) {
+ size_t wordIndex = bestKeys[i] % Probs.columns();
+
+ if (filter) {
+ wordIndex = filterIndices[wordIndex];
+ }
+
+ size_t hypIndex = bestKeys[i] / Probs.columns();
+ float cost = bestCosts[i];
+
+ HypothesisPtr hyp;
+ if (returnAlignment) {
+ std::vector<SoftAlignmentPtr> alignments;
+ for (auto& scorer : scorers) {
+ if (CPU::EncoderDecoder* encdec = dynamic_cast<CPU::EncoderDecoder*>(scorer.get())) {
+ auto& attention = encdec->GetAttention();
+ alignments.emplace_back(new SoftAlignment(attention.begin(hypIndex),
+ attention.end(hypIndex)));
+ } else {
+ UTIL_THROW2("Return Alignment is allowed only with Nematus scorer.");
+ }
+ }
+
+ hyp.reset(new Hypothesis(prevHyps[hypIndex], wordIndex, hypIndex, cost, alignments));
+ } else {
+ hyp.reset(new Hypothesis(prevHyps[hypIndex], wordIndex, hypIndex, cost));
+ }
+
+ if (doBreakdown) {
+ hyp->GetCostBreakdown().resize(scorers.size());
+ float sum = 0;
+ for(size_t j = 0; j < scorers.size(); ++j) {
+ if (j == 0) {
+ hyp->GetCostBreakdown()[0] = breakDowns[0][i];
+ } else {
+ float cost = 0;
+ if (j < scorers.size()) {
+ if (prevHyps[hypIndex]->GetCostBreakdown().size() < scorers.size())
+ const_cast<HypothesisPtr&>(prevHyps[hypIndex])->GetCostBreakdown().resize(scorers.size(), 0.0);
+ cost = breakDowns[j][i] + const_cast<HypothesisPtr&>(prevHyps[hypIndex])->GetCostBreakdown()[j];
+ }
+ sum += weights[scorers[j]->GetName()] * cost;
+ hyp->GetCostBreakdown()[j] = cost;
+ }
+ }
+ hyp->GetCostBreakdown()[0] -= sum;
+ hyp->GetCostBreakdown()[0] /= weights[scorers[0]->GetName()];
+ }
+ bestHyps.push_back(hyp);
+ }
+
+}
+}
diff --git a/src/cpu/decoder/encoder_decoder.cpp b/src/cpu/decoder/encoder_decoder.cpp
index e1cf4a95..f43d1860 100644
--- a/src/cpu/decoder/encoder_decoder.cpp
+++ b/src/cpu/decoder/encoder_decoder.cpp
@@ -1,18 +1,20 @@
-#include "encoder_decoder.h"
+#include "cpu/decoder/encoder_decoder.h"
+#include "cpu/decoder/encoder_decoder_loader.h"
#include <vector>
#include <yaml-cpp/yaml.h>
#include "common/threadpool.h"
-#include "../dl4mt/dl4mt.h"
+#include "cpu/dl4mt/dl4mt.h"
#include "common/god.h"
#include "common/loader.h"
#include "common/scorer.h"
#include "common/sentence.h"
-#include "../mblas/matrix.h"
+#include "cpu/mblas/matrix.h"
+#include "cpu/decoder/best_hyps.h"
using namespace std;
@@ -145,5 +147,9 @@ ScorerPtr EncoderDecoderLoader::NewScorer(const size_t) {
tab, *weights_[0]));
}
+BestHypsType EncoderDecoderLoader::GetBestHyps() {
+ return CPU::BestHyps;
+}
+
}
diff --git a/src/cpu/decoder/encoder_decoder.h b/src/cpu/decoder/encoder_decoder.h
index 153f8e2d..840c57d7 100644
--- a/src/cpu/decoder/encoder_decoder.h
+++ b/src/cpu/decoder/encoder_decoder.h
@@ -84,18 +84,4 @@ class EncoderDecoder : public Scorer {
mblas::Matrix SourceContext_;
};
-////////////////////////////////////////////////
-class EncoderDecoderLoader : public Loader {
- public:
- EncoderDecoderLoader(const std::string name,
- const YAML::Node& config);
-
- virtual void Load();
-
- virtual ScorerPtr NewScorer(const size_t taskId);
-
- private:
- std::vector<std::unique_ptr<Weights>> weights_;
-};
-
}
diff --git a/src/cpu/decoder/encoder_decoder_loader.h b/src/cpu/decoder/encoder_decoder_loader.h
index 7346b58e..0c98739c 100644
--- a/src/cpu/decoder/encoder_decoder_loader.h
+++ b/src/cpu/decoder/encoder_decoder_loader.h
@@ -7,6 +7,7 @@
#include "common/scorer.h"
#include "common/loader.h"
#include "common/logging.h"
+#include "common/base_best_hyps.h"
namespace CPU {
@@ -20,6 +21,7 @@ class EncoderDecoderLoader : public Loader {
virtual void Load();
virtual ScorerPtr NewScorer(const size_t taskId);
+ BestHypsType GetBestHyps();
private:
std::vector<std::unique_ptr<Weights>> weights_;
diff --git a/src/cpu/mblas/matrix.cpp b/src/cpu/mblas/matrix.cpp
index 64b5d330..22eae5ad 100644
--- a/src/cpu/mblas/matrix.cpp
+++ b/src/cpu/mblas/matrix.cpp
@@ -17,135 +17,6 @@ namespace CPU {
namespace mblas {
-/////////////////////////////////////////////////////////////////////
-struct ProbCompare {
- ProbCompare(const float* data) : data_(data) {}
-
- bool operator()(const unsigned a, const unsigned b) {
- return data_[a] > data_[b];
- }
-
- const float* data_;
-};
-
-/////////////////////////////////////////////////////////////////////
-void ArrayMatrix::BestHyps(Beam& bestHyps,
- const Beam& prevHyps,
- const size_t beamSize,
- const std::vector<ScorerPtr> &scorers,
- const Words &filterIndices,
- bool returnAlignment) const
-{
- using namespace mblas;
-
- auto& weights = God::GetScorerWeights();
-
- mblas::ArrayMatrix& Probs = static_cast<mblas::ArrayMatrix&>(scorers[0]->GetProbs());
-
- mblas::ArrayMatrix Costs(Probs.rows(), 1);
- for (size_t i = 0; i < prevHyps.size(); ++i) {
- Costs.data()[i] = prevHyps[i]->GetCost();
- }
-
- Probs *= weights[scorers[0]->GetName()];
- AddBiasVector<byColumn>(Probs, Costs);
-
- for (size_t i = 1; i < scorers.size(); ++i) {
- mblas::ArrayMatrix &currProb = static_cast<mblas::ArrayMatrix&>(scorers[i]->GetProbs());
-
- Probs += weights[scorers[i]->GetName()] * currProb;
- }
-
- size_t size = Probs.rows() * Probs.columns(); // Probs.size();
- std::vector<size_t> keys(size);
- for (size_t i = 0; i < keys.size(); ++i) {
- keys[i] = i;
- }
-
- std::vector<size_t> bestKeys(beamSize);
- std::vector<float> bestCosts(beamSize);
-
- if (!God::Get<bool>("allow-unk")) {
- blaze::column(Probs, UNK) = std::numeric_limits<float>::lowest();
- }
-
- std::nth_element(keys.begin(), keys.begin() + beamSize, keys.end(),
- ProbCompare(Probs.data()));
-
- for (size_t i = 0; i < beamSize; ++i) {
- bestKeys[i] = keys[i];
- bestCosts[i] = Probs.data()[keys[i]];
- }
-
- std::vector<HostVector<float>> breakDowns;
- bool doBreakdown = God::Get<bool>("n-best");
- if (doBreakdown) {
- breakDowns.push_back(bestCosts);
- for (auto& scorer : scorers) {
- HostVector<float> modelCosts(beamSize);
- mblas::ArrayMatrix &currProb = static_cast<mblas::ArrayMatrix&>(scorer->GetProbs());
-
- auto it = boost::make_permutation_iterator(currProb.begin(), keys.begin());
- std::copy(it, it + beamSize, modelCosts.begin());
- breakDowns.push_back(modelCosts);
- }
- }
-
- bool filter = God::Get<std::vector<std::string>>("softmax-filter").size();
-
- for (size_t i = 0; i < beamSize; i++) {
- size_t wordIndex = bestKeys[i] % Probs.columns();
-
- if (filter) {
- wordIndex = filterIndices[wordIndex];
- }
-
- size_t hypIndex = bestKeys[i] / Probs.columns();
- float cost = bestCosts[i];
-
- HypothesisPtr hyp;
- if (returnAlignment) {
- std::vector<SoftAlignmentPtr> alignments;
- for (auto& scorer : scorers) {
- if (CPU::EncoderDecoder* encdec = dynamic_cast<CPU::EncoderDecoder*>(scorer.get())) {
- auto& attention = encdec->GetAttention();
- alignments.emplace_back(new SoftAlignment(attention.begin(hypIndex),
- attention.end(hypIndex)));
- } else {
- UTIL_THROW2("Return Alignment is allowed only with Nematus scorer.");
- }
- }
-
- hyp.reset(new Hypothesis(prevHyps[hypIndex], wordIndex, hypIndex, cost, alignments));
- } else {
- hyp.reset(new Hypothesis(prevHyps[hypIndex], wordIndex, hypIndex, cost));
- }
-
- if (doBreakdown) {
- hyp->GetCostBreakdown().resize(scorers.size());
- float sum = 0;
- for(size_t j = 0; j < scorers.size(); ++j) {
- if (j == 0) {
- hyp->GetCostBreakdown()[0] = breakDowns[0][i];
- } else {
- float cost = 0;
- if (j < scorers.size()) {
- if (prevHyps[hypIndex]->GetCostBreakdown().size() < scorers.size())
- const_cast<HypothesisPtr&>(prevHyps[hypIndex])->GetCostBreakdown().resize(scorers.size(), 0.0);
- cost = breakDowns[j][i] + const_cast<HypothesisPtr&>(prevHyps[hypIndex])->GetCostBreakdown()[j];
- }
- sum += weights[scorers[j]->GetName()] * cost;
- hyp->GetCostBreakdown()[j] = cost;
- }
- }
- hyp->GetCostBreakdown()[0] -= sum;
- hyp->GetCostBreakdown()[0] /= weights[scorers[0]->GetName()];
- }
- bestHyps.push_back(hyp);
- }
-}
-
-
}
}
diff --git a/src/cpu/mblas/matrix.h b/src/cpu/mblas/matrix.h
index 1eb74323..b42db9bc 100644
--- a/src/cpu/mblas/matrix.h
+++ b/src/cpu/mblas/matrix.h
@@ -129,17 +129,11 @@ class ArrayMatrix : public BlazeMatrix<float, blaze::rowMajor>
: Parent(rows, columns, val)
{}
- template <class MT>
- ArrayMatrix(const MT& rhs)
- :Parent(rhs)
+ template <class MT>
+ ArrayMatrix(const MT& rhs)
+ : Parent(rhs)
{}
- virtual void BestHyps(Beam& bestHyps, const Beam& prevHyps,
- const size_t beamSize,
- const std::vector<ScorerPtr> &scorers,
- const Words &filterIndices,
- bool returnAlignment) const;
-
};
////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/decoder/ape_penalty.cu b/src/gpu/decoder/ape_penalty.cu
index 639a8e63..3cdd052d 100644
--- a/src/gpu/decoder/ape_penalty.cu
+++ b/src/gpu/decoder/ape_penalty.cu
@@ -2,6 +2,7 @@
#include "common/god.h"
#include "common/vocab.h"
#include "gpu/types-gpu.h"
+#include "gpu/decoder/best_hyps.h"
namespace GPU {
@@ -90,5 +91,9 @@ ScorerPtr ApePenaltyLoader::NewScorer(size_t taskId) {
srcTrgMap_, penalties_));
}
+BestHypsType ApePenaltyLoader::GetBestHyps() {
+ return GPU::BestHyps();
+}
+
}
diff --git a/src/gpu/decoder/ape_penalty.h b/src/gpu/decoder/ape_penalty.h
index 662da558..992d987d 100644
--- a/src/gpu/decoder/ape_penalty.h
+++ b/src/gpu/decoder/ape_penalty.h
@@ -5,9 +5,11 @@
#include "common/types.h"
#include "common/file_stream.h"
#include "common/scorer.h"
-#include "gpu/mblas/matrix.h"
+#include "common/base_best_hyps.h"
#include "common/loader.h"
+#include "gpu/mblas/matrix.h"
+
namespace GPU {
typedef std::vector<Word> SrcTrgMap;
@@ -67,6 +69,7 @@ class ApePenaltyLoader : public Loader {
virtual void Load();
virtual ScorerPtr NewScorer(size_t taskId);
+ virtual BestHypsType GetBestHyps();
private:
SrcTrgMap srcTrgMap_;
diff --git a/src/gpu/decoder/best_hyps.h b/src/gpu/decoder/best_hyps.h
new file mode 100644
index 00000000..d7529f0d
--- /dev/null
+++ b/src/gpu/decoder/best_hyps.h
@@ -0,0 +1,168 @@
+#pragma once
+
+#include "common/scorer.h"
+#include "gpu/mblas/matrix.h"
+
+#include <thrust/device_vector.h>
+#include <thrust/functional.h>
+#include <thrust/execution_policy.h>
+
+namespace GPU {
+
+struct ProbCompare {
+ ProbCompare(const float* data) : data_(data) {}
+
+ __host__ __device__
+ bool operator()(const unsigned a, const unsigned b) {
+ return data_[a] > data_[b];
+ }
+
+ const float* data_;
+};
+
+class BestHyps {
+ public:
+ void operator()(Beam& bestHyps,
+ const Beam& prevHyps,
+ const size_t beamSize,
+ const std::vector<ScorerPtr>& scorers,
+ const Words& filterIndices,
+ bool returnAlignment) {
+ using namespace mblas;
+
+ auto& weights = God::GetScorerWeights();
+
+ mblas::Matrix& Probs = static_cast<mblas::Matrix&>(scorers[0]->GetProbs());
+
+ Costs.reserve(Probs.Rows());
+ HostVector<float> vCosts;
+ for (auto& h : prevHyps) {
+ vCosts.push_back(h->GetCost());
+ }
+ thrust::copy(thrust::cuda::par.on(Matrix::GetStream()),
+ vCosts.begin(), vCosts.end(), Costs.begin());
+
+ BroadcastVecColumn(weights[scorers[0]->GetName()] * _1 + _2,
+ Probs, Costs);
+ for (size_t i = 1; i < scorers.size(); ++i) {
+ mblas::Matrix &currProbs = static_cast<mblas::Matrix&>(scorers[i]->GetProbs());
+
+ Element(_1 + weights[scorers[i]->GetName()] * _2,
+ Probs, currProbs);
+ }
+
+ keys.resize(Probs.size());
+ thrust::host_vector<unsigned> bestKeys(beamSize);
+ thrust::host_vector<float> bestCosts(beamSize);
+
+ // @TODO: make this more efficient
+ if (!God::Get<bool>("allow-unk")) {
+ for (size_t i = 0; i < Probs.Rows(); i++)
+ Probs.Set(i, UNK, std::numeric_limits<float>::lowest());
+ }
+
+ // @TODO: Here we need to have a partial sort
+ if (beamSize < 10) {
+ for (size_t i = 0; i < beamSize; ++i) {
+ DeviceVector<float>::iterator iter =
+ thrust::max_element(thrust::cuda::par.on(Matrix::GetStream()),
+ Probs.begin(), Probs.end());
+ bestKeys[i] = iter - Probs.begin();
+ bestCosts[i] = *iter;
+ *iter = std::numeric_limits<float>::lowest();
+ }
+ algo::copy(thrust::cuda::par.on(Matrix::GetStream()),
+ bestKeys.begin(), bestKeys.end(), keys.begin());
+ }
+ else {
+ // these two function do not have equivalents in
+ // in the standard library or boost, keeping thrust
+ // namespace for now
+ thrust::sequence(thrust::cuda::par.on(Matrix::GetStream()),
+ keys.begin(), keys.end());
+ thrust::sort_by_key(thrust::cuda::par.on(Matrix::GetStream()), Probs.begin(), Probs.end(),
+ keys.begin(), algo::greater<float>());
+
+ thrust::copy_n(thrust::cuda::par.on(Matrix::GetStream()),
+ keys.begin(), beamSize, bestKeys.begin());
+ thrust::copy_n(thrust::cuda::par.on(Matrix::GetStream()),
+ Probs.begin(), beamSize, bestCosts.begin());
+ }
+
+
+ std::vector<HostVector<float>> breakDowns;
+ bool doBreakdown = God::Get<bool>("n-best");
+ if (doBreakdown) {
+ breakDowns.push_back(bestCosts);
+ for (size_t i = 1; i < scorers.size(); ++i) {
+ HostVector<float> modelCosts(beamSize);
+ mblas::Matrix &currProbs = static_cast<mblas::Matrix&>(scorers[i]->GetProbs());
+
+ auto it = iteralgo::make_permutation_iterator(currProbs.begin(), keys.begin());
+ algo::copy(thrust::cuda::par.on(Matrix::GetStream()),
+ it, it + beamSize, modelCosts.begin());
+ breakDowns.push_back(modelCosts);
+ }
+ }
+
+ bool filter = God::Get<std::vector<std::string>>("softmax-filter").size();
+
+ for (size_t i = 0; i < beamSize; i++) {
+ size_t wordIndex = bestKeys[i] % Probs.Cols();
+ if (filter) {
+ wordIndex = filterIndices[wordIndex];
+ }
+
+ size_t hypIndex = bestKeys[i] / Probs.Cols();
+ float cost = bestCosts[i];
+
+ HypothesisPtr hyp;
+ if (returnAlignment) {
+ std::vector<SoftAlignmentPtr> alignments;
+ for (auto& scorer : scorers) {
+ if (GPU::EncoderDecoder* encdec = dynamic_cast<GPU::EncoderDecoder*>(scorer.get())) {
+ auto& attention = encdec->GetAttention();
+ size_t attLength = attention.Cols();
+
+ alignments.emplace_back(new SoftAlignment(attention.begin() + hypIndex * attLength,
+ attention.begin() + (hypIndex + 1) * attLength));
+ } else {
+ UTIL_THROW2("Return Alignment is allowed only with Nematus scorer.");
+ }
+ }
+ hyp.reset(new Hypothesis(prevHyps[hypIndex], wordIndex, hypIndex, cost, alignments));
+ } else {
+ hyp.reset(new Hypothesis(prevHyps[hypIndex], wordIndex, hypIndex, cost));
+ }
+
+ if(doBreakdown) {
+ hyp->GetCostBreakdown().resize(scorers.size());
+ float sum = 0;
+ for (size_t j = 0; j < scorers.size(); ++j) {
+ if (j == 0)
+ hyp->GetCostBreakdown()[0] = breakDowns[0][i];
+ else {
+ float cost = 0;
+ if (j < scorers.size()) {
+ if(prevHyps[hypIndex]->GetCostBreakdown().size() < scorers.size())
+ const_cast<HypothesisPtr&>(prevHyps[hypIndex])->GetCostBreakdown().resize(scorers.size(), 0.0);
+ cost = breakDowns[j][i] + const_cast<HypothesisPtr&>(prevHyps[hypIndex])->GetCostBreakdown()[j];
+ }
+ sum += weights[scorers[j]->GetName()] * cost;
+ hyp->GetCostBreakdown()[j] = cost;
+ }
+ }
+ hyp->GetCostBreakdown()[0] -= sum;
+ hyp->GetCostBreakdown()[0] /= weights[scorers[0]->GetName()];
+ }
+ bestHyps.push_back(hyp);
+ }
+ }
+
+ private:
+ mutable thrust::device_vector<unsigned> keys;
+ mutable thrust::device_vector<float> Costs;
+
+};
+
+}
diff --git a/src/gpu/decoder/encoder_decoder.cu b/src/gpu/decoder/encoder_decoder.cu
index 810f46cb..2330cc65 100644
--- a/src/gpu/decoder/encoder_decoder.cu
+++ b/src/gpu/decoder/encoder_decoder.cu
@@ -6,6 +6,7 @@
#include "gpu/mblas/matrix.h"
#include "gpu/dl4mt/dl4mt.h"
#include "gpu/decoder/encoder_decoder_state.h"
+#include "gpu/decoder/best_hyps.h"
using namespace std;
@@ -141,5 +142,9 @@ ScorerPtr EncoderDecoderLoader::NewScorer(size_t taskId) {
tab, *weights_[i]));
}
+BestHypsType EncoderDecoderLoader::GetBestHyps() {
+ return GPU::BestHyps();
+}
+
}
diff --git a/src/gpu/decoder/encoder_decoder.h b/src/gpu/decoder/encoder_decoder.h
index 9f555500..dad42b09 100644
--- a/src/gpu/decoder/encoder_decoder.h
+++ b/src/gpu/decoder/encoder_decoder.h
@@ -5,6 +5,7 @@
#include "common/scorer.h"
#include "common/loader.h"
+#include "common/base_best_hyps.h"
#include "common/threadpool.h"
#include <thrust/device_vector.h>
@@ -73,6 +74,7 @@ class EncoderDecoderLoader : public Loader {
virtual void Load();
virtual ScorerPtr NewScorer(size_t taskId);
+ virtual BestHypsType GetBestHyps();
private:
std::vector<std::unique_ptr<Weights>> weights_;