Move BestHyps outside mblas::Matrix

author: Tomasz Dwojak <t.dwojak@amu.edu.pl> 2016-10-21 20:25:44 +0300
committer: Tomasz Dwojak <t.dwojak@amu.edu.pl> 2016-11-01 21:07:04 +0300
commit: 5059b12aba0fcf5b520559891f4239604b7807a8 (patch)
tree: c25db197bb7cfb4aa6131c9a2171f68cce1b9aac
parent: e877b5760f1d9f04c6e8fb7e7b6b1a09fd9849c3 (diff)
19 files changed, 377 insertions, 171 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index dd3a7a14..52af5b36 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,7 +5,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
 project(amunn CXX)
 
 set(CMAKE_CXX_FLAGS_RELEASE "-std=c++14 -fPIC -O3 -Ofast -m64 -flto -march=native -funroll-loops -ffinite-math-only -Wno-unused-result -Wno-deprecated -pthread")
-set(CMAKE_CXX_FLAGS_DEBUG "-std=c++14 -fPIC -g -m64 -flto -march=native -funroll-loops -ffinite-math-only -Wno-unused-result -Wno-deprecated -pthread")
+set(CMAKE_CXX_FLAGS_DEBUG "-std=c++14 -fPIC -g -O0 -m64 -flto -march=native -funroll-loops -ffinite-math-only -Wno-unused-result -Wno-deprecated -pthread")
 set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -g -pg")
 set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS_RELEASE})
 
diff --git a/src/common/base_best_hyps.h b/src/common/base_best_hyps.h
new file mode 100644
index 00000000..995b38c3
--- /dev/null
+++ b/src/common/base_best_hyps.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <functional>
+#include <vector>
+
+#include "common/types.h"
+#include "scorer.h"
+
+
+using BestHypsType = std::function<void(Beam&, const Beam&, const size_t,
+                    const std::vector<ScorerPtr>&, const Words&, bool)>;
diff --git a/src/common/base_matrix.h b/src/common/base_matrix.h
index f12dacba..241b4440 100644
--- a/src/common/base_matrix.h
+++ b/src/common/base_matrix.h
@@ -20,12 +20,5 @@ class BaseMatrix {
     virtual size_t Cols() const = 0;
     virtual void Resize(size_t rows, size_t cols) = 0;
 
-    virtual void BestHyps(Beam& bestHyps,
-        const Beam& prevHyps,
-        const size_t beamSize,
-        const std::vector<ScorerPtr> &scorers,
-        const Words &filterIndices,
-        bool returnAlignment=false) const = 0;
-
     virtual std::string Debug() const = 0;
 };
diff --git a/src/common/god.cpp b/src/common/god.cpp
index de41bbaa..1d0923dd 100644
--- a/src/common/god.cpp
+++ b/src/common/god.cpp
@@ -186,6 +186,15 @@ std::vector<ScorerPtr> God::GetScorers(size_t threadId) {
   return scorers;
 }
 
+BestHypsType God::GetBestHyps(size_t threadId) {
+  size_t cpuThreads = God::Get<size_t>("cpu-threads");
+  if (threadId < cpuThreads) {
+    return Summon().cpuLoaders_.begin()->second->GetBestHyps();
+  } else {
+    return Summon().gpuLoaders_.begin()->second->GetBestHyps();
+  }
+}
+
 std::vector<std::string> God::GetScorerNames() {
   std::vector<std::string> scorerNames;
   for(auto&& name : Summon().cpuLoaders_ | boost::adaptors::map_keys)
diff --git a/src/common/god.h b/src/common/god.h
index a974c86d..1a686f6a 100644
--- a/src/common/god.h
+++ b/src/common/god.h
@@ -8,6 +8,7 @@
 #include "common/scorer.h"
 #include "common/types.h"
 #include "common/processor/processor.h"
+#include "common/base_best_hyps.h"
 
 class Weights;
 class Vocab;
@@ -45,6 +46,8 @@ class God {
 
     static Filter& GetFilter();
 
+    static BestHypsType GetBestHyps(size_t threadId);
+
     static std::vector<ScorerPtr> GetScorers(size_t);
     static std::vector<std::string> GetScorerNames();
     static std::map<std::string, float>& GetScorerWeights();
diff --git a/src/common/loader.h b/src/common/loader.h
index 970f190a..b3326d01 100644
--- a/src/common/loader.h
+++ b/src/common/loader.h
@@ -4,6 +4,7 @@
 #include <yaml-cpp/yaml.h>
 
 #include "scorer.h"
+#include "common/base_best_hyps.h"
 
 class Loader {
   public:
@@ -19,18 +20,19 @@ class Loader {
     bool Has(const std::string& key) {
       return config_[key];
     }
-    
+
     template <typename T>
     T Get(const std::string& key) {
       return config_[key].as<T>();
     }
 
     virtual ScorerPtr NewScorer(size_t) = 0;
+    virtual BestHypsType GetBestHyps() = 0;
 
     const std::string& GetName() {
       return name_;
     }
-    
+
   protected:
     const std::string name_;
     const YAML::Node config_;
diff --git a/src/common/search.cpp b/src/common/search.cpp
index 635798dc..5c8f4780 100644
--- a/src/common/search.cpp
+++ b/src/common/search.cpp
@@ -3,15 +3,19 @@
 #include <boost/timer/timer.hpp>
 
 #include "common/god.h"
+#include "common/history.h"
 #include "common/filter.h"
 #include "common/base_matrix.h"
 
 using namespace std;
 
 Search::Search(size_t threadId)
-  : scorers_(God::GetScorers(threadId)) {}
+  : scorers_(God::GetScorers(threadId)),
+    BestHyps_(God::GetBestHyps(threadId)) {
+}
+
 
-size_t Search::MakeFilter(const Words& srcWords, const size_t vocabSize) {
+size_t Search::MakeFilter(const Words& srcWords, size_t vocabSize) {
   filterIndices_ = God::GetFilter().GetFilteredVocab(srcWords, vocabSize);
   for (size_t i = 0; i < scorers_.size(); i++) {
       scorers_[i]->Filter(filterIndices_);
@@ -68,7 +72,7 @@ History Search::Decode(const Sentence& sentence) {
 
     bool returnAlignment = God::Get<bool>("return-alignment");
 
-    scorers_[0]->GetProbs().BestHyps(hyps, prevHyps, beamSize, scorers_, filterIndices_,
+    BestHyps_(hyps, prevHyps, beamSize, scorers_, filterIndices_,
                                      returnAlignment);
     history.Add(hyps, history.size() == maxLength);
 
diff --git a/src/common/search.h b/src/common/search.h
index 1a301d23..e0f3f815 100644
--- a/src/common/search.h
+++ b/src/common/search.h
@@ -4,7 +4,9 @@
 
 #include "common/scorer.h"
 #include "common/sentence.h"
-#include "common/history.h"
+#include "common/base_best_hyps.h"
+
+class History;
 
 class Search {
   public:
@@ -12,7 +14,8 @@ class Search {
     History Decode(const Sentence& sentence);
 
   private:
-    size_t MakeFilter(const Words& srcWords, const size_t vocabSize);
+    size_t MakeFilter(const Words& srcWords, size_t vocabSize);
     std::vector<ScorerPtr> scorers_;
     Words filterIndices_;
+    BestHypsType BestHyps_;
 };
diff --git a/src/cpu/decoder/best_hyps.h b/src/cpu/decoder/best_hyps.h
new file mode 100644
index 00000000..791ece68
--- /dev/null
+++ b/src/cpu/decoder/best_hyps.h
@@ -0,0 +1,139 @@
+#pragma once
+
+#include <vector>
+#include <boost/iterator/permutation_iterator.hpp>
+
+#include "common/scorer.h"
+#include "common/god.h"
+#include "common/exception.h"
+#include "cpu/mblas/matrix.h"
+
+namespace CPU {
+
+struct ProbCompare {
+  ProbCompare(const float* data) : data_(data) {}
+
+  bool operator()(const unsigned a, const unsigned b) {
+    return data_[a] > data_[b];
+  }
+
+  const float* data_;
+};
+
+void BestHyps(Beam& bestHyps,
+    const Beam& prevHyps,
+		const size_t beamSize,
+		const std::vector<ScorerPtr> &scorers,
+		const Words &filterIndices,
+    bool returnAlignment)
+{
+  using namespace mblas;
+
+  auto& weights = God::GetScorerWeights();
+
+  mblas::ArrayMatrix& Probs = static_cast<mblas::ArrayMatrix&>(scorers[0]->GetProbs());
+
+  mblas::ArrayMatrix Costs(Probs.rows(), 1);
+  for (size_t i = 0; i < prevHyps.size(); ++i) {
+    Costs.data()[i] = prevHyps[i]->GetCost();
+  }
+
+  Probs *= weights[scorers[0]->GetName()];
+  AddBiasVector<byColumn>(Probs, Costs);
+
+  for (size_t i = 1; i < scorers.size(); ++i) {
+	  mblas::ArrayMatrix &currProb = static_cast<mblas::ArrayMatrix&>(scorers[i]->GetProbs());
+
+	  Probs += weights[scorers[i]->GetName()] * currProb;
+  }
+
+  size_t size = Probs.rows() * Probs.columns(); // Probs.size();
+  std::vector<size_t> keys(size);
+  for (size_t i = 0; i < keys.size(); ++i) {
+    keys[i] = i;
+  }
+
+  std::vector<size_t> bestKeys(beamSize);
+  std::vector<float> bestCosts(beamSize);
+
+  if (!God::Get<bool>("allow-unk")) {
+    blaze::column(Probs, UNK) = std::numeric_limits<float>::lowest();
+  }
+
+  std::nth_element(keys.begin(), keys.begin() + beamSize, keys.end(),
+		           ProbCompare(Probs.data()));
+
+  for (size_t i = 0; i < beamSize; ++i) {
+    bestKeys[i] = keys[i];
+    bestCosts[i] = Probs.data()[keys[i]];
+  }
+
+  std::vector<std::vector<float>> breakDowns;
+  bool doBreakdown = God::Get<bool>("n-best");
+  if (doBreakdown) {
+    breakDowns.push_back(bestCosts);
+    for (auto& scorer : scorers) {
+      std::vector<float> modelCosts(beamSize);
+      mblas::ArrayMatrix &currProb = static_cast<mblas::ArrayMatrix&>(scorer->GetProbs());
+
+      auto it = boost::make_permutation_iterator(currProb.begin(), keys.begin());
+      std::copy(it, it + beamSize, modelCosts.begin());
+      breakDowns.push_back(modelCosts);
+    }
+  }
+
+  bool filter = God::Get<std::vector<std::string>>("softmax-filter").size();
+
+  for (size_t i = 0; i < beamSize; i++) {
+    size_t wordIndex = bestKeys[i] % Probs.columns();
+
+    if (filter) {
+      wordIndex = filterIndices[wordIndex];
+    }
+
+    size_t hypIndex  = bestKeys[i] / Probs.columns();
+    float cost = bestCosts[i];
+
+    HypothesisPtr hyp;
+    if (returnAlignment) {
+      std::vector<SoftAlignmentPtr> alignments;
+      for (auto& scorer : scorers) {
+        if (CPU::EncoderDecoder* encdec = dynamic_cast<CPU::EncoderDecoder*>(scorer.get())) {
+          auto& attention = encdec->GetAttention();
+          alignments.emplace_back(new SoftAlignment(attention.begin(hypIndex),
+                                                    attention.end(hypIndex)));
+        } else {
+          UTIL_THROW2("Return Alignment is allowed only with Nematus scorer.");
+        }
+      }
+
+      hyp.reset(new Hypothesis(prevHyps[hypIndex], wordIndex, hypIndex, cost, alignments));
+    } else {
+      hyp.reset(new Hypothesis(prevHyps[hypIndex], wordIndex, hypIndex, cost));
+    }
+
+    if (doBreakdown) {
+      hyp->GetCostBreakdown().resize(scorers.size());
+      float sum = 0;
+      for(size_t j = 0; j < scorers.size(); ++j) {
+        if (j == 0) {
+          hyp->GetCostBreakdown()[0] = breakDowns[0][i];
+        } else {
+          float cost = 0;
+          if (j < scorers.size()) {
+            if (prevHyps[hypIndex]->GetCostBreakdown().size() < scorers.size())
+              const_cast<HypothesisPtr&>(prevHyps[hypIndex])->GetCostBreakdown().resize(scorers.size(), 0.0);
+            cost = breakDowns[j][i] + const_cast<HypothesisPtr&>(prevHyps[hypIndex])->GetCostBreakdown()[j];
+          }
+          sum += weights[scorers[j]->GetName()] * cost;
+          hyp->GetCostBreakdown()[j] = cost;
+        }
+      }
+      hyp->GetCostBreakdown()[0] -= sum;
+      hyp->GetCostBreakdown()[0] /= weights[scorers[0]->GetName()];
+    }
+    bestHyps.push_back(hyp);
+  }
+
+}
+}
diff --git a/src/cpu/decoder/encoder_decoder.cpp b/src/cpu/decoder/encoder_decoder.cpp
index e1cf4a95..f43d1860 100644
--- a/src/cpu/decoder/encoder_decoder.cpp
+++ b/src/cpu/decoder/encoder_decoder.cpp
@@ -1,18 +1,20 @@
-#include "encoder_decoder.h"
+#include "cpu/decoder/encoder_decoder.h"
+#include "cpu/decoder/encoder_decoder_loader.h"
 
 #include <vector>
 #include <yaml-cpp/yaml.h>
 
 #include "common/threadpool.h"
 
-#include "../dl4mt/dl4mt.h"
+#include "cpu/dl4mt/dl4mt.h"
 
 #include "common/god.h"
 #include "common/loader.h"
 #include "common/scorer.h"
 #include "common/sentence.h"
 
-#include "../mblas/matrix.h"
+#include "cpu/mblas/matrix.h"
+#include "cpu/decoder/best_hyps.h"
 
 using namespace std;
 
@@ -145,5 +147,9 @@ ScorerPtr EncoderDecoderLoader::NewScorer(const size_t) {
                                       tab, *weights_[0]));
 }
 
+BestHypsType EncoderDecoderLoader::GetBestHyps() {
+  return CPU::BestHyps;
+}
+
 }
 
diff --git a/src/cpu/decoder/encoder_decoder.h b/src/cpu/decoder/encoder_decoder.h
index 153f8e2d..840c57d7 100644
--- a/src/cpu/decoder/encoder_decoder.h
+++ b/src/cpu/decoder/encoder_decoder.h
@@ -84,18 +84,4 @@ class EncoderDecoder : public Scorer {
     mblas::Matrix SourceContext_;
 };
 
-////////////////////////////////////////////////
-class EncoderDecoderLoader : public Loader {
-  public:
-    EncoderDecoderLoader(const std::string name,
-                         const YAML::Node& config);
-
-    virtual void Load();
-
-    virtual ScorerPtr NewScorer(const size_t taskId);
-
-  private:
-    std::vector<std::unique_ptr<Weights>> weights_;
-};
-
 }
diff --git a/src/cpu/decoder/encoder_decoder_loader.h b/src/cpu/decoder/encoder_decoder_loader.h
index 7346b58e..0c98739c 100644
--- a/src/cpu/decoder/encoder_decoder_loader.h
+++ b/src/cpu/decoder/encoder_decoder_loader.h
@@ -7,6 +7,7 @@
 #include "common/scorer.h"
 #include "common/loader.h"
 #include "common/logging.h"
+#include "common/base_best_hyps.h"
 
 namespace CPU {
 
@@ -20,6 +21,7 @@ class EncoderDecoderLoader : public Loader {
     virtual void Load();
 
     virtual ScorerPtr NewScorer(const size_t taskId);
+    BestHypsType GetBestHyps();
 
   private:
     std::vector<std::unique_ptr<Weights>> weights_;
diff --git a/src/cpu/mblas/matrix.cpp b/src/cpu/mblas/matrix.cpp
index 64b5d330..22eae5ad 100644
--- a/src/cpu/mblas/matrix.cpp
+++ b/src/cpu/mblas/matrix.cpp
@@ -17,135 +17,6 @@ namespace CPU {
 
 namespace mblas {
 
-/////////////////////////////////////////////////////////////////////
-struct ProbCompare {
-  ProbCompare(const float* data) : data_(data) {}
-
-  bool operator()(const unsigned a, const unsigned b) {
-    return data_[a] > data_[b];
-  }
-
-  const float* data_;
-};
-
-/////////////////////////////////////////////////////////////////////
-void ArrayMatrix::BestHyps(Beam& bestHyps,
-    const Beam& prevHyps,
-		const size_t beamSize,
-		const std::vector<ScorerPtr> &scorers,
-		const Words &filterIndices,
-    bool returnAlignment) const
-{
-  using namespace mblas;
-
-  auto& weights = God::GetScorerWeights();
-
-  mblas::ArrayMatrix& Probs = static_cast<mblas::ArrayMatrix&>(scorers[0]->GetProbs());
-
-  mblas::ArrayMatrix Costs(Probs.rows(), 1);
-  for (size_t i = 0; i < prevHyps.size(); ++i) {
-    Costs.data()[i] = prevHyps[i]->GetCost();
-  }
-
-  Probs *= weights[scorers[0]->GetName()];
-  AddBiasVector<byColumn>(Probs, Costs);
-
-  for (size_t i = 1; i < scorers.size(); ++i) {
-	  mblas::ArrayMatrix &currProb = static_cast<mblas::ArrayMatrix&>(scorers[i]->GetProbs());
-
-	  Probs += weights[scorers[i]->GetName()] * currProb;
-  }
-
-  size_t size = Probs.rows() * Probs.columns(); // Probs.size();
-  std::vector<size_t> keys(size);
-  for (size_t i = 0; i < keys.size(); ++i) {
-    keys[i] = i;
-  }
-
-  std::vector<size_t> bestKeys(beamSize);
-  std::vector<float> bestCosts(beamSize);
-
-  if (!God::Get<bool>("allow-unk")) {
-    blaze::column(Probs, UNK) = std::numeric_limits<float>::lowest();
-  }
-
-  std::nth_element(keys.begin(), keys.begin() + beamSize, keys.end(),
-		           ProbCompare(Probs.data()));
-
-  for (size_t i = 0; i < beamSize; ++i) {
-    bestKeys[i] = keys[i];
-    bestCosts[i] = Probs.data()[keys[i]];
-  }
-
-  std::vector<HostVector<float>> breakDowns;
-  bool doBreakdown = God::Get<bool>("n-best");
-  if (doBreakdown) {
-    breakDowns.push_back(bestCosts);
-    for (auto& scorer : scorers) {
-      HostVector<float> modelCosts(beamSize);
-      mblas::ArrayMatrix &currProb = static_cast<mblas::ArrayMatrix&>(scorer->GetProbs());
-
-      auto it = boost::make_permutation_iterator(currProb.begin(), keys.begin());
-      std::copy(it, it + beamSize, modelCosts.begin());
-      breakDowns.push_back(modelCosts);
-    }
-  }
-
-  bool filter = God::Get<std::vector<std::string>>("softmax-filter").size();
-
-  for (size_t i = 0; i < beamSize; i++) {
-    size_t wordIndex = bestKeys[i] % Probs.columns();
-
-    if (filter) {
-      wordIndex = filterIndices[wordIndex];
-    }
-
-    size_t hypIndex  = bestKeys[i] / Probs.columns();
-    float cost = bestCosts[i];
-
-    HypothesisPtr hyp;
-    if (returnAlignment) {
-      std::vector<SoftAlignmentPtr> alignments;
-      for (auto& scorer : scorers) {
-        if (CPU::EncoderDecoder* encdec = dynamic_cast<CPU::EncoderDecoder*>(scorer.get())) {
-          auto& attention = encdec->GetAttention();
-          alignments.emplace_back(new SoftAlignment(attention.begin(hypIndex),
-                                                    attention.end(hypIndex)));
-        } else {
-          UTIL_THROW2("Return Alignment is allowed only with Nematus scorer.");
-        }
-      }
-
-      hyp.reset(new Hypothesis(prevHyps[hypIndex], wordIndex, hypIndex, cost, alignments));
-    } else {
-      hyp.reset(new Hypothesis(prevHyps[hypIndex], wordIndex, hypIndex, cost));
-    }
-
-    if (doBreakdown) {
-      hyp->GetCostBreakdown().resize(scorers.size());
-      float sum = 0;
-      for(size_t j = 0; j < scorers.size(); ++j) {
-        if (j == 0) {
-          hyp->GetCostBreakdown()[0] = breakDowns[0][i];
-        } else {
-          float cost = 0;
-          if (j < scorers.size()) {
-            if (prevHyps[hypIndex]->GetCostBreakdown().size() < scorers.size())
-              const_cast<HypothesisPtr&>(prevHyps[hypIndex])->GetCostBreakdown().resize(scorers.size(), 0.0);
-            cost = breakDowns[j][i] + const_cast<HypothesisPtr&>(prevHyps[hypIndex])->GetCostBreakdown()[j];
-          }
-          sum += weights[scorers[j]->GetName()] * cost;
-          hyp->GetCostBreakdown()[j] = cost;
-        }
-      }
-      hyp->GetCostBreakdown()[0] -= sum;
-      hyp->GetCostBreakdown()[0] /= weights[scorers[0]->GetName()];
-    }
-    bestHyps.push_back(hyp);
-  }
-}
-
-
 }
 }
 
diff --git a/src/cpu/mblas/matrix.h b/src/cpu/mblas/matrix.h
index 1eb74323..b42db9bc 100644
--- a/src/cpu/mblas/matrix.h
+++ b/src/cpu/mblas/matrix.h
@@ -129,17 +129,11 @@ class ArrayMatrix : public BlazeMatrix<float, blaze::rowMajor>
       : Parent(rows, columns, val)
     {}
 
-      template <class MT>
-      ArrayMatrix(const MT& rhs)
-      :Parent(rhs)
+    template <class MT>
+    ArrayMatrix(const MT& rhs)
+      : Parent(rhs)
     {}
 
-    virtual void BestHyps(Beam& bestHyps, const Beam& prevHyps,
-        const size_t beamSize,
-        const std::vector<ScorerPtr> &scorers,
-        const Words &filterIndices,
-        bool returnAlignment) const;
-
 };
 
 ////////////////////////////////////////////////////////////////////////
diff --git a/src/gpu/decoder/ape_penalty.cu b/src/gpu/decoder/ape_penalty.cu
index 639a8e63..3cdd052d 100644
--- a/src/gpu/decoder/ape_penalty.cu
+++ b/src/gpu/decoder/ape_penalty.cu
@@ -2,6 +2,7 @@
 #include "common/god.h"
 #include "common/vocab.h"
 #include "gpu/types-gpu.h"
+#include "gpu/decoder/best_hyps.h"
 
 namespace GPU {
 
@@ -90,5 +91,9 @@ ScorerPtr ApePenaltyLoader::NewScorer(size_t taskId) {
                                   srcTrgMap_, penalties_));
 }
 
+BestHypsType ApePenaltyLoader::GetBestHyps() {
+  return GPU::BestHyps();
+}
+
 }
 
diff --git a/src/gpu/decoder/ape_penalty.h b/src/gpu/decoder/ape_penalty.h
index 662da558..992d987d 100644
--- a/src/gpu/decoder/ape_penalty.h
+++ b/src/gpu/decoder/ape_penalty.h
@@ -5,9 +5,11 @@
 #include "common/types.h"
 #include "common/file_stream.h"
 #include "common/scorer.h"
-#include "gpu/mblas/matrix.h"
+#include "common/base_best_hyps.h"
 #include "common/loader.h"
 
+#include "gpu/mblas/matrix.h"
+
 namespace GPU {
 
 typedef std::vector<Word> SrcTrgMap;
@@ -67,6 +69,7 @@ class ApePenaltyLoader : public Loader {
     virtual void Load();
 
     virtual ScorerPtr NewScorer(size_t taskId);
+    virtual BestHypsType GetBestHyps();
 
   private:
     SrcTrgMap srcTrgMap_;
diff --git a/src/gpu/decoder/best_hyps.h b/src/gpu/decoder/best_hyps.h
new file mode 100644
index 00000000..d7529f0d
--- /dev/null
+++ b/src/gpu/decoder/best_hyps.h
@@ -0,0 +1,168 @@
+#pragma once
+
+#include "common/scorer.h"
+#include "gpu/mblas/matrix.h"
+
+#include <thrust/device_vector.h>
+#include <thrust/functional.h>
+#include <thrust/execution_policy.h>
+
+namespace GPU {
+
+struct ProbCompare {
+  ProbCompare(const float* data) : data_(data) {}
+
+  __host__ __device__
+  bool operator()(const unsigned a, const unsigned b) {
+    return data_[a] > data_[b];
+  }
+
+  const float* data_;
+};
+
+class BestHyps {
+  public:
+    void operator()(Beam& bestHyps,
+          const Beam& prevHyps,
+          const size_t beamSize,
+          const std::vector<ScorerPtr>& scorers,
+          const Words& filterIndices,
+          bool returnAlignment) {
+        using namespace mblas;
+
+        auto& weights = God::GetScorerWeights();
+
+        mblas::Matrix& Probs = static_cast<mblas::Matrix&>(scorers[0]->GetProbs());
+
+        Costs.reserve(Probs.Rows());
+        HostVector<float> vCosts;
+        for (auto& h : prevHyps) {
+          vCosts.push_back(h->GetCost());
+        }
+        thrust::copy(thrust::cuda::par.on(Matrix::GetStream()),
+                  vCosts.begin(), vCosts.end(), Costs.begin());
+
+        BroadcastVecColumn(weights[scorers[0]->GetName()] * _1 + _2,
+                          Probs, Costs);
+        for (size_t i = 1; i < scorers.size(); ++i) {
+          mblas::Matrix &currProbs = static_cast<mblas::Matrix&>(scorers[i]->GetProbs());
+
+          Element(_1 + weights[scorers[i]->GetName()] * _2,
+                  Probs, currProbs);
+        }
+
+        keys.resize(Probs.size());
+        thrust::host_vector<unsigned> bestKeys(beamSize);
+        thrust::host_vector<float> bestCosts(beamSize);
+
+        // @TODO: make this more efficient
+        if (!God::Get<bool>("allow-unk")) {
+          for (size_t i = 0; i < Probs.Rows(); i++)
+            Probs.Set(i, UNK, std::numeric_limits<float>::lowest());
+        }
+
+        // @TODO: Here we need to have a partial sort
+        if (beamSize < 10) {
+          for (size_t i = 0; i < beamSize; ++i) {
+            DeviceVector<float>::iterator iter =
+            thrust::max_element(thrust::cuda::par.on(Matrix::GetStream()),
+                                Probs.begin(), Probs.end());
+            bestKeys[i] = iter - Probs.begin();
+            bestCosts[i] = *iter;
+            *iter = std::numeric_limits<float>::lowest();
+          }
+          algo::copy(thrust::cuda::par.on(Matrix::GetStream()),
+                    bestKeys.begin(), bestKeys.end(), keys.begin());
+        }
+        else {
+            // these two function do not have equivalents in
+            // in the standard library or boost, keeping thrust
+            // namespace for now
+            thrust::sequence(thrust::cuda::par.on(Matrix::GetStream()),
+                            keys.begin(), keys.end());
+            thrust::sort_by_key(thrust::cuda::par.on(Matrix::GetStream()), Probs.begin(), Probs.end(),
+                                keys.begin(), algo::greater<float>());
+
+            thrust::copy_n(thrust::cuda::par.on(Matrix::GetStream()),
+                        keys.begin(), beamSize, bestKeys.begin());
+            thrust::copy_n(thrust::cuda::par.on(Matrix::GetStream()),
+                        Probs.begin(), beamSize, bestCosts.begin());
+        }
+
+
+        std::vector<HostVector<float>> breakDowns;
+        bool doBreakdown = God::Get<bool>("n-best");
+        if (doBreakdown) {
+            breakDowns.push_back(bestCosts);
+            for (size_t i = 1; i < scorers.size(); ++i) {
+                HostVector<float> modelCosts(beamSize);
+                mblas::Matrix &currProbs = static_cast<mblas::Matrix&>(scorers[i]->GetProbs());
+
+                auto it = iteralgo::make_permutation_iterator(currProbs.begin(), keys.begin());
+                algo::copy(thrust::cuda::par.on(Matrix::GetStream()),
+                            it, it + beamSize, modelCosts.begin());
+                breakDowns.push_back(modelCosts);
+            }
+        }
+
+        bool filter = God::Get<std::vector<std::string>>("softmax-filter").size();
+
+        for (size_t i = 0; i < beamSize; i++) {
+          size_t wordIndex = bestKeys[i] % Probs.Cols();
+          if (filter) {
+            wordIndex = filterIndices[wordIndex];
+          }
+
+          size_t hypIndex  = bestKeys[i] / Probs.Cols();
+          float cost = bestCosts[i];
+
+          HypothesisPtr hyp;
+          if (returnAlignment) {
+            std::vector<SoftAlignmentPtr> alignments;
+            for (auto& scorer : scorers) {
+              if (GPU::EncoderDecoder* encdec = dynamic_cast<GPU::EncoderDecoder*>(scorer.get())) {
+                auto& attention = encdec->GetAttention();
+                size_t attLength = attention.Cols();
+
+                alignments.emplace_back(new SoftAlignment(attention.begin() + hypIndex * attLength,
+                                                          attention.begin() + (hypIndex + 1) * attLength));
+              } else {
+                UTIL_THROW2("Return Alignment is allowed only with Nematus scorer.");
+              }
+            }
+            hyp.reset(new Hypothesis(prevHyps[hypIndex], wordIndex, hypIndex, cost, alignments));
+          } else {
+            hyp.reset(new Hypothesis(prevHyps[hypIndex], wordIndex, hypIndex, cost));
+          }
+
+          if(doBreakdown) {
+            hyp->GetCostBreakdown().resize(scorers.size());
+            float sum = 0;
+            for (size_t j = 0; j < scorers.size(); ++j) {
+              if (j == 0)
+              hyp->GetCostBreakdown()[0] = breakDowns[0][i];
+              else {
+              float cost = 0;
+              if (j < scorers.size()) {
+                  if(prevHyps[hypIndex]->GetCostBreakdown().size() < scorers.size())
+                  const_cast<HypothesisPtr&>(prevHyps[hypIndex])->GetCostBreakdown().resize(scorers.size(), 0.0);
+                  cost = breakDowns[j][i] + const_cast<HypothesisPtr&>(prevHyps[hypIndex])->GetCostBreakdown()[j];
+              }
+              sum += weights[scorers[j]->GetName()] * cost;
+              hyp->GetCostBreakdown()[j] = cost;
+              }
+          }
+          hyp->GetCostBreakdown()[0] -= sum;
+          hyp->GetCostBreakdown()[0] /= weights[scorers[0]->GetName()];
+        }
+        bestHyps.push_back(hyp);
+      }
+    }
+
+  private:
+    mutable thrust::device_vector<unsigned> keys;
+    mutable thrust::device_vector<float> Costs;
+
+};
+
+}
diff --git a/src/gpu/decoder/encoder_decoder.cu b/src/gpu/decoder/encoder_decoder.cu
index 810f46cb..2330cc65 100644
--- a/src/gpu/decoder/encoder_decoder.cu
+++ b/src/gpu/decoder/encoder_decoder.cu
@@ -6,6 +6,7 @@
 #include "gpu/mblas/matrix.h"
 #include "gpu/dl4mt/dl4mt.h"
 #include "gpu/decoder/encoder_decoder_state.h"
+#include "gpu/decoder/best_hyps.h"
 
 using namespace std;
 
@@ -141,5 +142,9 @@ ScorerPtr EncoderDecoderLoader::NewScorer(size_t taskId) {
                                       tab, *weights_[i]));
 }
 
+BestHypsType EncoderDecoderLoader::GetBestHyps() {
+  return GPU::BestHyps();
+}
+
 }
 
diff --git a/src/gpu/decoder/encoder_decoder.h b/src/gpu/decoder/encoder_decoder.h
index 9f555500..dad42b09 100644
--- a/src/gpu/decoder/encoder_decoder.h
+++ b/src/gpu/decoder/encoder_decoder.h
@@ -5,6 +5,7 @@
 
 #include "common/scorer.h"
 #include "common/loader.h"
+#include "common/base_best_hyps.h"
 
 #include "common/threadpool.h"
 #include <thrust/device_vector.h>
@@ -73,6 +74,7 @@ class EncoderDecoderLoader : public Loader {
     virtual void Load();
 
     virtual ScorerPtr NewScorer(size_t taskId);
+    virtual BestHypsType GetBestHyps();
 
   private:
     std::vector<std::unique_ptr<Weights>> weights_;
author	Tomasz Dwojak <t.dwojak@amu.edu.pl>	2016-10-21 20:25:44 +0300
committer	Tomasz Dwojak <t.dwojak@amu.edu.pl>	2016-11-01 21:07:04 +0300
commit	5059b12aba0fcf5b520559891f4239604b7807a8 (patch)
tree	c25db197bb7cfb4aa6131c9a2171f68cce1b9aac
parent	e877b5760f1d9f04c6e8fb7e7b6b1a09fd9849c3 (diff)