8 files changed, 151 insertions, 143 deletions
diff --git a/src/graph/expression_operators.cpp b/src/graph/expression_operators.cpp
index 8a79cbe0..5c37beef 100755
--- a/src/graph/expression_operators.cpp
+++ b/src/graph/expression_operators.cpp
@@ -536,7 +536,7 @@ Expr swapAxes(Expr x, int axis1, int axis2)
     return x;
   // TODO: This is code dup from transpose(x). Implement transpose(x) as swapAxes(x, 0, 1)
   std::vector<int> axes(x->shape().size());
-  for (int i = 0; i < axes.size(); ++i)
+  for (int i = 0; i < axes.size(); ++i) // @TODO: use std::iota()
     axes[i] = i;
   std::swap(axes[axis1], axes[axis2]);
   return transpose(x, axes);
diff --git a/src/microsoft/quicksand.cpp b/src/microsoft/quicksand.cpp
index 2c9fc2a3..98b9fdd5 100755
--- a/src/microsoft/quicksand.cpp
+++ b/src/microsoft/quicksand.cpp
@@ -147,7 +147,7 @@ public:
           else
             alignmentThreshold = std::max(std::stof(alignment), 0.f);
           auto hyp = std::get<1>(result);
-          data::WordAlignment align = data::ConvertSoftAlignToHardAlign(hyp->TracebackAlignment(), alignmentThreshold);
+          data::WordAlignment align = data::ConvertSoftAlignToHardAlign(hyp->tracebackAlignment(), alignmentThreshold);
           // convert to QuickSAND format
           alignmentSets.resize(words.size());
           for (const auto& p : align) // @TODO: Does the feature_model param max_alignment_links apply here?
diff --git a/src/translator/beam_search.h b/src/translator/beam_search.h
index 5272c217..f825b853 100755
--- a/src/translator/beam_search.h
+++ b/src/translator/beam_search.h
@@ -31,74 +31,83 @@ public:
         trgEosId_(trgEosId),
         trgUnkId_(trgUnkId) {}
 
-  Beams toHyps(const std::vector<unsigned int> keys,
-               const std::vector<float> pathScores,
-               size_t vocabSize,
+  // combine new expandedPathScores and previous beams into new set of beams
+  Beams toHyps(const std::vector<unsigned int> nBestKeys, // [dimBatch, beamSize] flattened -> ((batchIdx, beamHypIdx) flattened, word idx) flattened
+               const std::vector<float> nBestPathScores,  // [dimBatch, beamSize] flattened
+               const size_t vocabSize,
                const Beams& beams,
-               const std::vector<Ptr<ScorerState>>& states,
-               size_t beamSize,
-               bool first,
-               Ptr<data::CorpusBatch> batch) const {
-    Beams newBeams(beams.size());
+               const std::vector<Ptr<ScorerState /*const*/>>& states,
+               const size_t beamSize,
+               const bool first,
+               Ptr<data::CorpusBatch /*const*/> batch) const {
+    const auto dimBatch = beams.size();
+    Beams newBeams(dimBatch);
 
     std::vector<float> align;
     if(options_->hasAndNotEmpty("alignment"))
       // Use alignments from the first scorer, even if ensemble
       align = scorers_[0]->getAlignment();
 
-    for(size_t i = 0; i < keys.size(); ++i) { // keys: [beamSize, ?] (flattened)
+    for(size_t i = 0; i < nBestKeys.size(); ++i) { // [dimBatch, beamSize] flattened
       // Keys contains indices to vocab items in the entire beam.
       // Values can be between 0 and beamSize * vocabSize.
-      auto beamIdx = i / beamSize;
+      const auto batchIdx = i / beamSize; // and i % beamSize is the beam hyp index
+      const auto& beam = beams[batchIdx];
+      auto& newBeam = newBeams[batchIdx];
+
+      if(newBeam.size() < beam.size()) {
+        const float pathScore = nBestPathScores[i];
+        const auto  key       = nBestKeys[i]; // key = pathScore's location, as ((batchIdx, beamHypIdx) flattened, word idx) flattened
+
+        // decompose key into individual indices
+        Word       wordIdx = (Word)(key % vocabSize);
+        const auto hypIdx  =       (key / vocabSize);
+#if 1
+        // further decompose hypIdx, taking into account that the very first entry had beam size 1
+        // and compose a new hypIdx that assumes actual beamSize
+        const auto keyBatchIdx   = hypIdx / (first ? 1 : beamSize);
+        const auto keyBeamHypIdx = hypIdx % (first ? 1 : beamSize);
+        const auto hypIdxTrans = keyBeamHypIdx * dimBatch + keyBatchIdx;
+        ABORT_IF(keyBeamHypIdx >= (int)beam.size(), "Beam hyp index exceeds beam size??"); // not possible, as beamSize = max(beams[.].size())
+#else
+        const auto keyBatchIdx = hypIdx / beamSize; // @REVIEW: is this actually keyBatchIdx?
+        size_t keyBeamHypIdx = hypIdx % beamSize;
+
+        auto hypIdxTrans = keyBatchIdx + keyBeamHypIdx * dimBatch;
+        if(first)
+          hypIdxTrans = hypIdx; // == keyBeamHypIdx + keyBatchIdx * beamSize? or was beamSize=1, and keyBeamHypIdx = 0?
+
+        ABORT_IF(keyBeamHypIdx >= (int)beam.size(), "Beam hyp index exceeds beam size??");
+        //if(keyBeamHypIdx >= (int)beam.size())  // @TODO: What is this condition? Cf. keyBeamHypIdx = hypIdx % beamSize; beamSize = max(beams[.].size())
+        //  keyBeamHypIdx = keyBeamHypIdx % beam.size();
 
-      if(newBeams[beamIdx].size() < beams[beamIdx].size()) {
-        Word wordIdx = (Word)(keys[i] % vocabSize);
+        if(first)
+          keyBeamHypIdx = 0;
+#endif
         // Retrieve short list for final softmax (based on words aligned
         // to source sentences). If short list has been set, map the indices
         // in the sub-selected vocabulary matrix back to their original positions.
         auto shortlist = scorers_[0]->getShortlist();
         if(shortlist)
           wordIdx = shortlist->reverseMap(wordIdx); // @TODO: should reverseMap accept a size_t or a Word?
+        // now wordIdx is a regular Word again
 
-        const auto& beam = beams[beamIdx];
-        auto& newBeam = newBeams[beamIdx];
-
-        const float pathScore = pathScores[i];
-
-        // keys[i] = offset into row-major cube of dims [whatIsThis, beamSize, vocabSize]
-        // deconstruct into individual indices
-        const auto hypIdx = (IndexType)(keys[i] / vocabSize);
-        const auto whatIsThis = (hypIdx / beamSize); // @TODO: is this batchIdx?
-        size_t beamHypIdx = hypIdx % beamSize;
-
-        auto hypIdxTrans = IndexType(whatIsThis + beamHypIdx * beams.size());
-        if(first)
-          hypIdxTrans = hypIdx;
-
-        if(beamHypIdx >= (int)beam.size())  // @TODO: What is this condition? Cf. beamHypIdx = hypIdx % beamSize
-          beamHypIdx = beamHypIdx % beam.size();
-
-        if(first)
-          beamHypIdx = 0;
-
-        auto hyp = New<Hypothesis>(beam[beamHypIdx], wordIdx, hypIdxTrans, pathScore);
+        auto hyp = New<Hypothesis>(beam[keyBeamHypIdx], wordIdx, (IndexType)hypIdxTrans, pathScore);
 
         // Set score breakdown for n-best lists
         if(options_->get<bool>("n-best")) {
           std::vector<float> breakDown(states.size(), 0);
-          beam[beamHypIdx]->GetScoreBreakdown().resize(states.size(), 0);
+          beam[keyBeamHypIdx]->getScoreBreakdown().resize(states.size(), 0); // @TODO: Why? Can we just guard the read-out below, then make it const? Or getScoreBreakdown(j)?
           for(size_t j = 0; j < states.size(); ++j) {
-            size_t key = wordIdx + hypIdxTrans * vocabSize;
-            breakDown[j] = states[j]->breakDown(key)
-                           + beam[beamHypIdx]->GetScoreBreakdown()[j];
+            size_t key1 = hypIdxTrans * vocabSize + wordIdx;
+            breakDown[j] = states[j]->breakDown(key1) + beam[keyBeamHypIdx]->getScoreBreakdown()[j];
           }
-          hyp->GetScoreBreakdown() = breakDown;
+          hyp->setScoreBreakdown(breakDown);
         }
 
         // Set alignments
         if(!align.empty()) {
-          hyp->SetAlignment(
-              getAlignmentsForHypothesis(align, batch, (int)beamHypIdx, (int)beamIdx));
+          hyp->setAlignment(getAlignmentsForHypothesis(align, batch, (int)keyBeamHypIdx, (int)batchIdx));
         }
 
         newBeam.push_back(hyp);
@@ -161,7 +170,13 @@ public:
   //**********************************************************************
   // main decoding function
   Histories search(Ptr<ExpressionGraph> graph, Ptr<data::CorpusBatch> batch) {
-    int dimBatch = (int)batch->size();
+    const int dimBatch = (int)batch->size();
+
+    auto getNBestList = createGetNBestListFn(beamSize_, dimBatch, graph->getDeviceId());
+
+    for(auto scorer : scorers_) {
+      scorer->clear(graph);
+    }
 
     Histories histories(dimBatch);
     for(int i = 0; i < dimBatch; ++i) {
@@ -171,27 +186,20 @@ public:
                                   options_->get<float>("word-penalty"));
     }
 
-    size_t localBeamSize = beamSize_; // max over beam sizes of active sentence hypotheses
-
-    auto getNBestList = createGetNBestListFn(localBeamSize, dimBatch, graph->getDeviceId());
-
-    Beams beams(dimBatch);        // array [dimBatch] of array [localBeamSize] of Hypothesis
-    for(auto& beam : beams)
-      beam.resize(localBeamSize, New<Hypothesis>());
-
-    for(int i = 0; i < dimBatch; ++i)
-      histories[i]->add(beams[i], trgEosId_);
-
+    // start states
     std::vector<Ptr<ScorerState>> states;
-
-    for(auto scorer : scorers_) {
-      scorer->clear(graph);
-    }
-
     for(auto scorer : scorers_) {
       states.push_back(scorer->startState(graph, batch));
     }
 
+    Beams beams(dimBatch, Beam(beamSize_, New<Hypothesis>())); // array [dimBatch] of array [localBeamSize] of Hypothesis
+    //Beams beams(dimBatch); // array [dimBatch] of array [localBeamSize] of Hypothesis
+    //for(auto& beam : beams)
+    //  beam.resize(beamSize_, New<Hypothesis>());
+
+    for(int i = 0; i < dimBatch; ++i)
+      histories[i]->add(beams[i], trgEosId_);
+
     // the decoder maintains the following state:
     //  - histories : array [dimBatch] of History
     //    with History : vector [t] of array [localBeamSize] of Hypothesis
@@ -206,104 +214,116 @@ public:
     //  - states[.] : ScorerState
     //     - NN state
     //     - one per scorer, e.g. 2 for ensemble of 2
+    //     - gets replaced at the end of each output time step
 
     // main loop over output time steps
-    for(bool first = true; ; first = false) {
+    for (size_t t = 0; ; t++) {
+      ABORT_IF(dimBatch != beams.size(), "Lost a batch entry??");
+      // determine beam size for next output time step, as max over still-active sentences
+      // E.g. if all batch entries are down from beam 5 to no more than 4 surviving hyps, then
+      // switch to beam of 4 for all. If all are done, then beam ends up being 0, and we are done.
+      size_t localBeamSize = 0; // @TODO: is there some std::algorithm for this?
+      for(auto& beam : beams)
+        if(beam.size() > localBeamSize)
+          localBeamSize = beam.size();
+
+      // done if all batch entries have reached EOS on all beam entries
+      if (localBeamSize == 0)
+        break;
+
       //**********************************************************************
       // create constant containing previous path scores for current beam
       // Also create mapping of hyp indices, which are not 1:1 if sentences complete.
-      std::vector<IndexType> hypIndices; // [localBeamsize, 1, dimBatch, 1] (flattened) index of beam index that each of the new top N originated from
-      std::vector<Word> prevWords;      // [localBeamsize, 1, dimBatch, 1] (flattened) predecessor word
-      Expr prevPathScores;               // [localBeamSize, 1, dimBatch, 1], where the last axis broadcasts into vocab size when adding pathScores
-      if(first) {
-        // no scores yet
+      std::vector<IndexType> hypIndices; // [localBeamsize, 1, dimBatch, 1] (flattened) index of hyp that the new top N originated from
+      std::vector<Word> prevWords;       // [localBeamsize, 1, dimBatch, 1] (flattened) predecessor word
+      Expr prevPathScores;               // [localBeamSize, 1, dimBatch, 1], where the last axis broadcasts into vocab size when adding expandedPathScores
+      if(t == 0) { // no scores yet
         prevPathScores = graph->constant({1, 1, 1, 1}, inits::from_value(0));
       } else {
-        dimBatch = (int)batch->size();
-        ABORT_IF(dimBatch != beams.size(), "Dimensions mismatch??");
-
-        std::vector<float> beamScores;
+        std::vector<float> prevScores;
         for(size_t beamIndex = 0; beamIndex < localBeamSize; ++beamIndex) {
           for(int batchIndex = 0; batchIndex < dimBatch; ++batchIndex) { // loop over batch entries (active sentences)
             auto& beam = beams[batchIndex];
             if(beamIndex < beam.size()) {
               auto hyp = beam[beamIndex];
-              hypIndices.push_back((IndexType)hyp->getPrevStateIndex()); // backpointer
+              hypIndices.push_back((IndexType)hyp->getPrevStateIndex()); // index where to find prev hyp (beamHypIdx, batchIdx), =beamHypIdx * dimBatch + batchIdx
               prevWords .push_back(hyp->getWord());
-              beamScores.push_back(hyp->getPathScore());
-            } else {  // dummy hypothesis
+              prevScores.push_back(hyp->getPathScore());
+            } else {  // pad to localBeamSize (dummy hypothesis)
               hypIndices.push_back(0);
               prevWords .push_back(Word{});  // (unused)
-              beamScores.push_back(-9999);
+              prevScores.push_back(-9999);
             }
           }
         }
-
         prevPathScores = graph->constant({(int)localBeamSize, 1, dimBatch, 1},
-                                    inits::from_vector(beamScores));
+                                    inits::from_vector(prevScores));
       }
 
       //**********************************************************************
-      // prepare scores for beam search
-      auto pathScores = prevPathScores;
-
+      // compute expanded path scores with word prediction probs from all scorers
+      auto expandedPathScores = prevPathScores; // will become [localBeamSize, 1, dimBatch, dimVocab]
       for(size_t i = 0; i < scorers_.size(); ++i) {
         // compute output probabilities for current output time step
-        //  - uses hypIndices[index in beam, 1, batch index, 1] and embIndices[index in beam, 1, batch index, 1] to reorder hypotheses
+        //  - uses hypIndices[index in beam, 1, batch index, 1] to reorder hypotheses
+        //  - adds prevWords [index in beam, 1, batch index, 1] to the decoder model's target history
+        //  - performs one step of the decoder model
         //  - returns new NN state for use in next output time step
         //  - returns vector of prediction probabilities over output vocab via newState
         auto newState = scorers_[i]->step(
             graph, states[i], hypIndices, prevWords, dimBatch, (int)localBeamSize);
 
         // expand all hypotheses, [localBeamSize, 1, dimBatch, 1] -> [localBeamSize, 1, dimBatch, dimVocab]
-        pathScores = pathScores + scorers_[i]->getWeight() * newState->getLogProbs();
+        expandedPathScores = expandedPathScores + scorers_[i]->getWeight() * newState->getLogProbs();
 
         // update state in-place for next output time step
         states[i] = newState;
       }
 
       // make beams continuous
-      if(dimBatch > 1 && localBeamSize > 1)
-        pathScores = transpose(pathScores, {2, 1, 0, 3}); // -> [dimBatch, 1, localBeamSize, dimVocab]
+      expandedPathScores = swapAxes(expandedPathScores, 0, 2); // -> [dimBatch, 1, localBeamSize, dimVocab]
+      //if(dimBatch > 1 && localBeamSize > 1)
+      //  expandedPathScores = transpose(expandedPathScores, {2, 1, 0, 3}); // -> [dimBatch, 1, localBeamSize, dimVocab]
 
-      if(first)
+      // perform NN computation
+      if(t == 0)
         graph->forward();
       else
         graph->forwardNext();
 
       //**********************************************************************
       // suppress specific symbols if not at right positions
-      if(trgUnkId_ != -1 && options_->has("allow-unk")
-         && !options_->get<bool>("allow-unk"))
-        suppressWord(pathScores, trgUnkId_);
+      if(trgUnkId_ != -1 && options_->has("allow-unk") && !options_->get<bool>("allow-unk"))
+        suppressWord(expandedPathScores, trgUnkId_);
       for(auto state : states)
-        state->blacklist(pathScores, batch);
+        state->blacklist(expandedPathScores, batch);
 
       //**********************************************************************
-      // perform beam search and pruning
+      // perform beam search
 
       // find N best amongst the (localBeamSize * dimVocab) hypotheses
-      const std::vector<size_t> beamSizes(dimBatch, localBeamSize);
-      std::vector<unsigned int> outKeys;
-      std::vector<float> outPathScores;
-      getNBestList(beamSizes, pathScores->val(), outPathScores, outKeys, first);
-      // outPathScores and outKeys contain pathScores and their original indices in N-best order
-
-      // convert N-best sets to updated search space
-      int dimTrgVoc = pathScores->shape()[-1];
-      beams = toHyps(outKeys,
-                     outPathScores,
-                     dimTrgVoc,
+      std::vector<unsigned int> nBestKeys; // [dimBatch, localBeamSize] flattened -> ((batchIdx, beamHypIdx) flattened, word idx) flattened
+      std::vector<float> nBestPathScores;  // [dimBatch, localBeamSize] flattened
+      getNBestList(/*beamSizes=*/std::vector<size_t>(dimBatch, localBeamSize), // output layout of (nBestPathScores, nBestKeys)  --@REVIEW: correct?
+                   /*in*/ expandedPathScores->val(),                           // [dimBatch, 1, localBeamSize, dimVocab or dimShortlist]
+                   /*out*/ nBestPathScores, /*out*/ nBestKeys,
+                   /*first=*/t == 0); // @TODO: Why is this passed? To know that the beam size is 1 for first step, for flattened hyp index?
+      // Now, nBestPathScores contain N-best expandedPathScores, and nBestKeys for each their original location (batchIdx, beamHypIdx, word).
+
+      // combine N-best sets with existing search space (beams) to updated search space
+      beams = toHyps(nBestKeys, nBestPathScores,
+                     /*dimTrgVoc=*/expandedPathScores->shape()[-1],
                      beams,
-                     states,
-                     localBeamSize,
-                     first,
+                     states,           // used for keeping track of per-ensemble-member path score
+                     localBeamSize,    // used in the encoding of the (batchIdx, beamHypIdx, word) tuples
+                     /*first=*/t == 0, // used to indicate originating beamSize of 1
                      batch);
 
       // remove all hyps that end in EOS
-      auto purgedBeams = purgeBeams(beams); // @TODO: rename; this is not pruning
+      // The position of a hyp in the beam may change.
+      const auto purgedBeams = purgeBeams(beams);
 
-      // add updated search space to search grid for traceback
+      // add updated search space (beams) to search grid (histories) for traceback
       bool maxLengthReached = false;
       for(int i = 0; i < dimBatch; ++i) {
         // if this batch entry has surviving hyps then add them to the traceback grid
@@ -318,20 +338,7 @@ public:
 
       // this is the search space for the next output time step
       beams = purgedBeams;
-
-      // determine beam size for next output time step, as max over still-active sentences
-      // E.g. if all batch entries are down from beam 5 to no more than 4 surviving hyps, then
-      // switch to beam of 4 for all. If all are done, then beam ends up being 0, and we are done.
-      if(!first) {
-        size_t maxBeam = 0;
-        for(auto& beam : beams)
-          if(beam.size() > maxBeam)
-            maxBeam = beam.size();
-        localBeamSize = maxBeam;
-      }
-      if (localBeamSize == 0) // done if all batch entries have reached EOS on all beam entries
-        break;
-    } // end of main loop over output tokens
+    } // end of main loop over output time steps
 
     return histories; // [dimBatch][t][N best hyps]
   }
diff --git a/src/translator/history.h b/src/translator/history.h
index dcf5e585..ff37fb6e 100755
--- a/src/translator/history.h
+++ b/src/translator/history.h
@@ -25,7 +25,7 @@ public:
   History(size_t lineNo, float alpha = 1.f, float wp_ = 0.f);
 
   void add(const Beam& beam, Word trgEosId, bool last = false) {
-    if(beam.back()->GetPrevHyp() != nullptr) {
+    if(beam.back()->getPrevHyp() != nullptr) {
       for(size_t j = 0; j < beam.size(); ++j)
         if(beam[j]->getWord() == trgEosId || last) {
           float pathScore =
@@ -52,7 +52,7 @@ public:
       // std::cerr << "h: " << start << " " << j << " " << c << std::endl;
 
       // trace back best path
-      Words targetWords = bestHyp->TracebackWords();
+      Words targetWords = bestHyp->tracebackWords();
 
       // note: bestHyp->getPathScore() is not normalized, while bestHypCoord.normalizedPathScore is
       nbest.emplace_back(targetWords, bestHyp, bestHypCoord.normalizedPathScore);
diff --git a/src/translator/hypothesis.h b/src/translator/hypothesis.h
index ddb47514..452b3349 100755
--- a/src/translator/hypothesis.h
+++ b/src/translator/hypothesis.h
@@ -17,11 +17,11 @@ public:
 
   Hypothesis(const Ptr<Hypothesis> prevHyp,
              Word word,
-             IndexType prevIndex,
+             IndexType prevIndex, // (beamHypIdx, batchIdx) flattened as beamHypIdx * dimBatch + batchIdx
              float pathScore)
       : prevHyp_(prevHyp), prevIndex_(prevIndex), word_(word), pathScore_(pathScore) {}
 
-  const Ptr<Hypothesis> GetPrevHyp() const { return prevHyp_; }
+  const Ptr<Hypothesis> getPrevHyp() const { return prevHyp_; }
 
   Word getWord() const { return word_; }
 
@@ -29,16 +29,17 @@ public:
 
   float getPathScore() const { return pathScore_; }
 
-  std::vector<float>& GetScoreBreakdown() { return scoreBreakdown_; }
-  std::vector<float>& GetAlignment() { return alignment_; }
+  std::vector<float>& getScoreBreakdown() { return scoreBreakdown_; }
+  void setScoreBreakdown(const std::vector<float>& scoreBreaddown) { scoreBreakdown_ = scoreBreaddown; }
 
-  void SetAlignment(const std::vector<float>& align) { alignment_ = align; };
+  const std::vector<float>& getAlignment() { return alignment_; }
+  void setAlignment(const std::vector<float>& align) { alignment_ = align; };
 
   // helpers to trace back paths referenced from this hypothesis
-  Words TracebackWords()
+  Words tracebackWords()
   {
       Words targetWords;
-      for (auto hyp = this; hyp->GetPrevHyp(); hyp = hyp->GetPrevHyp().get()) {
+      for (auto hyp = this; hyp->getPrevHyp(); hyp = hyp->getPrevHyp().get()) {
           targetWords.push_back(hyp->getWord());
           // std::cerr << hyp->getWord() << " " << hyp << std::endl;
       }
@@ -48,11 +49,11 @@ public:
 
   // get soft alignments for each target word starting from the hyp one
   typedef data::SoftAlignment SoftAlignment;
-  SoftAlignment TracebackAlignment()
+  SoftAlignment tracebackAlignment()
   {
       SoftAlignment align;
-      for (auto hyp = this; hyp->GetPrevHyp(); hyp = hyp->GetPrevHyp().get()) {
-          align.push_back(hyp->GetAlignment());
+      for (auto hyp = this; hyp->getPrevHyp(); hyp = hyp->getPrevHyp().get()) {
+          align.push_back(hyp->getAlignment());
       }
       std::reverse(align.begin(), align.end());
       return align;
@@ -64,12 +65,12 @@ private:
   const Word word_;
   const float pathScore_;
 
-  std::vector<float> scoreBreakdown_;
+  std::vector<float> scoreBreakdown_; // [num scorers]
   std::vector<float> alignment_;
 };
 
-typedef std::vector<Ptr<Hypothesis>> Beam;                // Beam = vector of hypotheses
-typedef std::vector<Beam> Beams;                          // Beams = vector of vector of hypotheses
+typedef std::vector<Ptr<Hypothesis>> Beam;                // Beam = vector [beamSize] of hypotheses
+typedef std::vector<Beam> Beams;                          // Beams = vector [batchDim] of vector [beamSize] of hypotheses
 typedef std::tuple<Words, Ptr<Hypothesis>, float> Result; // (word ids for hyp, hyp, normalized sentence score for hyp)
 typedef std::vector<Result> NBestList;                    // sorted vector of (word ids, hyp, sent score) tuples
 }  // namespace marian
diff --git a/src/translator/output_printer.cpp b/src/translator/output_printer.cpp
index 000af789..05167898 100644..100755
--- a/src/translator/output_printer.cpp
+++ b/src/translator/output_printer.cpp
@@ -6,9 +6,9 @@ std::string OutputPrinter::getAlignment(const Ptr<Hypothesis>& hyp) {
   data::SoftAlignment align;
   auto last = hyp;
   // get soft alignments for each target word starting from the last one
-  while(last->GetPrevHyp().get() != nullptr) {
-    align.push_back(last->GetAlignment());
-    last = last->GetPrevHyp();
+  while(last->getPrevHyp().get() != nullptr) {
+    align.push_back(last->getAlignment());
+    last = last->getPrevHyp();
   }
 
   // reverse alignments
diff --git a/src/translator/output_printer.h b/src/translator/output_printer.h
index 38ed7b9c..83ef9c0c 100755
--- a/src/translator/output_printer.h
+++ b/src/translator/output_printer.h
@@ -41,11 +41,11 @@ public:
         bestn << " ||| " << getAlignment(hypo);
 
       bestn << " |||";
-      if(hypo->GetScoreBreakdown().empty()) {
+      if(hypo->getScoreBreakdown().empty()) {
         bestn << " F0=" << hypo->getPathScore();
       } else {
-        for(size_t j = 0; j < hypo->GetScoreBreakdown().size(); ++j) {
-          bestn << " F" << j << "= " << hypo->GetScoreBreakdown()[j];
+        for(size_t j = 0; j < hypo->getScoreBreakdown().size(); ++j) {
+          bestn << " F" << j << "= " << hypo->getScoreBreakdown()[j];
         }
       }
 
diff --git a/src/translator/scorers.h b/src/translator/scorers.h
index 16a066b0..36591fe8 100644..100755
--- a/src/translator/scorers.h
+++ b/src/translator/scorers.h
@@ -9,9 +9,9 @@ namespace marian {
 
 class ScorerState {
 public:
-  virtual Expr getLogProbs() = 0;
+  virtual Expr getLogProbs() const = 0;
 
-  virtual float breakDown(size_t i) { return getLogProbs()->val()->get(i); }
+  float breakDown(size_t i) const { return getLogProbs()->val()->get(i); }
 
   virtual void blacklist(Expr /*totalCosts*/, Ptr<data::CorpusBatch> /*batch*/){};
 };
@@ -57,7 +57,7 @@ public:
 
   virtual Ptr<DecoderState> getState() { return state_; }
 
-  virtual Expr getLogProbs() override { return state_->getLogProbs(); };
+  virtual Expr getLogProbs() const override { return state_->getLogProbs(); };
 
   virtual void blacklist(Expr totalCosts, Ptr<data::CorpusBatch> batch) override {
     state_->blacklist(totalCosts, batch);