6 files changed, 47 insertions, 10 deletions
diff --git a/lm/Jamfile b/lm/Jamfile
index 6ca37c99e..227b22014 100644
--- a/lm/Jamfile
+++ b/lm/Jamfile
@@ -17,7 +17,7 @@ wrappers = ;
 local with-nplm = [ option.get "with-nplm" ] ;
 if $(with-nplm) {
   lib neuralLM : : <search>$(with-nplm)/src ;
-  obj nplm.o : wrappers/nplm.cc : <include>.. <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen <cxxflags>-fopenmp ;
+  obj nplm.o : wrappers/nplm.cc : <include>.. <include>$(with-nplm)/src <cxxflags>-fopenmp ;
   alias nplm : nplm.o neuralLM ..//boost_thread : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>WITH_NPLM <library>..//boost_thread ;
   wrappers += nplm ;
 }
@@ -37,4 +37,4 @@ for local p in [ glob *_main.cc ] {
   exes += $(name) ;
 }
 
-alias programs : $(exes) filter//filter : <threading>multi:<source>builder//lmplz ;
+alias programs : $(exes) filter//filter builder//dump_counts : <threading>multi:<source>builder//lmplz ;
diff --git a/lm/builder/Jamfile b/lm/builder/Jamfile
index b596e086a..1e0e18b5f 100644
--- a/lm/builder/Jamfile
+++ b/lm/builder/Jamfile
@@ -4,6 +4,10 @@ fakelib builder : [ glob *.cc : *test.cc *main.cc ]
 
 exe lmplz : lmplz_main.cc builder /top//boost_program_options ;
 
+exe dump_counts : dump_counts_main.cc builder ;
+
+alias programs : lmplz dump_counts ;
+
 import testing ;
 unit-test corpus_count_test : corpus_count_test.cc builder /top//boost_unit_test_framework ;
 unit-test adjust_counts_test : adjust_counts_test.cc builder /top//boost_unit_test_framework ;
diff --git a/lm/builder/dump_counts_main.cc b/lm/builder/dump_counts_main.cc
new file mode 100644
index 000000000..fa0016792
--- /dev/null
+++ b/lm/builder/dump_counts_main.cc
@@ -0,0 +1,36 @@
+#include "lm/builder/print.hh"
+#include "lm/word_index.hh"
+#include "util/file.hh"
+#include "util/read_compressed.hh"
+
+#include <boost/lexical_cast.hpp>
+
+#include <iostream>
+#include <vector>
+
+int main(int argc, char *argv[]) {
+  if (argc != 4) {
+    std::cerr << "Usage: " << argv[0] << " counts vocabulary order\n"
+    "The counts file contains records with 4-byte vocabulary ids followed by 8-byte\n"
+    "counts.  Each record has order many vocabulary ids.\n"
+    "The vocabulary file contains the words delimited by NULL in order of id.\n"
+    "The vocabulary file may not be compressed because it is mmapped but the counts\n"
+    "file can be compressed.\n";
+    return 1;
+  }
+  util::ReadCompressed counts(util::OpenReadOrThrow(argv[1]));
+  util::scoped_fd vocab_file(util::OpenReadOrThrow(argv[2]));
+  lm::builder::VocabReconstitute vocab(vocab_file.get());
+  unsigned int order = boost::lexical_cast<unsigned int>(argv[3]);
+  std::vector<char> record(sizeof(uint32_t) * order + sizeof(uint64_t));
+  while (std::size_t got = counts.ReadOrEOF(&*record.begin(), record.size())) {
+    UTIL_THROW_IF(got != record.size(), util::Exception, "Read " << got << " bytes at the end of file, which is not a complete record of length " << record.size());
+    const lm::WordIndex *words = reinterpret_cast<const lm::WordIndex*>(&*record.begin());
+    for (const lm::WordIndex *i = words; i != words + order; ++i) {
+      UTIL_THROW_IF(*i >= vocab.Size(), util::Exception, "Vocab ID " << *i << " is larger than the vocab file's maximum of " << vocab.Size() << ".  Are you sure you have the right order and vocab file for these counts?");
+      std::cout << vocab.Lookup(*i) << ' ';
+    }
+    // TODO don't use std::cout because it is slow.  Add fast uint64_t printing support to FakeOFStream.
+    std::cout << *reinterpret_cast<const uint64_t*>(words + order) << '\n';
+  }
+}
diff --git a/lm/builder/print.cc b/lm/builder/print.cc
index c70e62ed6..75f15f0a6 100644
--- a/lm/builder/print.cc
+++ b/lm/builder/print.cc
@@ -54,9 +54,8 @@ void PrintARPA::Run(const util::stream::ChainPositions &positions) {
       for (const WordIndex *i = stream->begin() + 1; i != stream->end(); ++i) {
         out << ' ' << vocab_.Lookup(*i);
       }
-      float backoff = stream->Value().complete.backoff;
-      if (backoff != 0.0)
-        out << '\t' << backoff;
+      if (order != positions.size())
+        out << '\t' << stream->Value().complete.backoff;
       out << '\n';
     
     }
diff --git a/lm/ngram_query.hh b/lm/ngram_query.hh
index 9e32d113a..5f330c5cc 100644
--- a/lm/ngram_query.hh
+++ b/lm/ngram_query.hh
@@ -36,7 +36,7 @@ struct FullPrint : public BasicPrint {
       "Perplexity including OOVs:\t" << ppl_including_oov << "\n"
       "Perplexity excluding OOVs:\t" << ppl_excluding_oov << "\n"
       "OOVs:\t" << corpus_oov << "\n"
-      "Tokenss:\t" << corpus_tokens << '\n'
+      "Tokens:\t" << corpus_tokens << '\n'
       ;
   }
 };
diff --git a/moses/LM/NeuralLMWrapper.cpp b/moses/LM/NeuralLMWrapper.cpp
index 9411bd2c4..ab7b5400b 100644
--- a/moses/LM/NeuralLMWrapper.cpp
+++ b/moses/LM/NeuralLMWrapper.cpp
@@ -4,7 +4,6 @@
 #include <boost/functional/hash.hpp>
 #include "NeuralLMWrapper.h"
 #include "neuralLM.h"
-#include <model.h>
 
 using namespace std;
 
@@ -34,7 +33,6 @@ void NeuralLMWrapper::Load()
   m_sentenceEndWord[m_factorType] = m_sentenceEnd;
 
   m_neuralLM_shared = new nplm::neuralLM(m_filePath, true);
-  m_neuralLM_shared->set_log_base(10);
   //TODO: config option?
   m_neuralLM_shared->set_cache(1000000);
 
@@ -56,7 +54,7 @@ LMResult NeuralLMWrapper::GetValue(const vector<const Word*> &contextFactor, Sta
   for (size_t i=0, n=contextFactor.size(); i<n; i++) {
     const Word* word = contextFactor[i];
     const Factor* factor = word->GetFactor(m_factorType);
-    const std::string string= factor->GetString().as_string();
+    const std::string string = factor->GetString().as_string();
     int neuralLM_wordID = m_neuralLM->lookup_word(string);
     words[i] = neuralLM_wordID;
     boost::hash_combine(hashCode, neuralLM_wordID);
@@ -66,7 +64,7 @@ LMResult NeuralLMWrapper::GetValue(const vector<const Word*> &contextFactor, Sta
 
   // Create a new struct to hold the result
   LMResult ret;
-  ret.score = value;
+  ret.score = FloorScore(value);
   ret.unknown = false;
 
   (*finalState) = (State*) hashCode;