13 files changed, 1044 insertions, 41 deletions
diff --git a/Jamroot b/Jamroot
index c882cb5d3..c3d8307b0 100644
--- a/Jamroot
+++ b/Jamroot
@@ -208,7 +208,7 @@ if [ option.get "with-icu" : : "yes" ]
 
 # for probing pt
 external-lib boost_serialization ;
-requirements += <library>boost_serialization ;
+requirements += <library>boost_serialization/<runtime-link>static ;
 
 if [ option.get "with-vw" ] {
   requirements += <define>HAVE_VW ;
@@ -247,6 +247,7 @@ if [ option.get "with-mm-extras" : : "yes" ]
   moses/TranslationModel/UG//bitext-find 
   moses/TranslationModel/UG//ptable-describe-features 
   moses/TranslationModel/UG//count-ptable-features 
+  moses/TranslationModel/UG//ptable-sigtest-filter 
   moses/TranslationModel/UG//ptable-lookup 
   moses/TranslationModel/UG//ptable-lookup-corpus 
   moses/TranslationModel/UG//check-coverage 
diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project
index 32bfa1927..e8651529d 100644
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@@ -1636,16 +1636,6 @@
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/TargetNgramFeature.h</locationURI>
 		</link>
 		<link>
-			<name>FF/TargetPreferencesFeature.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/TargetPreferencesFeature.cpp</locationURI>
-		</link>
-		<link>
-			<name>FF/TargetPreferencesFeature.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/TargetPreferencesFeature.h</locationURI>
-		</link>
-		<link>
 			<name>FF/TargetWordInsertionFeature.cpp</name>
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/FF/TargetWordInsertionFeature.cpp</locationURI>
@@ -2006,16 +1996,6 @@
 			<locationURI>PARENT-3-PROJECT_LOC/moses/PP/SpanLengthPhraseProperty.h</locationURI>
 		</link>
 		<link>
-			<name>PP/TargetPreferencesPhraseProperty.cpp</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/PP/TargetPreferencesPhraseProperty.cpp</locationURI>
-		</link>
-		<link>
-			<name>PP/TargetPreferencesPhraseProperty.h</name>
-			<type>1</type>
-			<locationURI>PARENT-3-PROJECT_LOC/moses/PP/TargetPreferencesPhraseProperty.h</locationURI>
-		</link>
-		<link>
 			<name>PP/TreeStructurePhraseProperty.h</name>
 			<type>1</type>
 			<locationURI>PARENT-3-PROJECT_LOC/moses/PP/TreeStructurePhraseProperty.h</locationURI>
diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp
index e44c5c509..c2d8d3363 100644
--- a/moses/FF/Factory.cpp
+++ b/moses/FF/Factory.cpp
@@ -93,6 +93,7 @@
 #endif
 
 #include "moses/LM/Ken.h"
+#include "moses/LM/Reloading.h"
 #ifdef LM_IRST
 #include "moses/LM/IRST.h"
 #endif
@@ -203,6 +204,14 @@ public:
   }
 };
 
+class ReloadingFactory : public FeatureFactory
+{
+public:
+  void Create(const std::string &line) {
+    DefaultSetup(ConstructReloadingLM(line));
+  }
+};
+
 } // namespace
 
 FeatureRegistry::FeatureRegistry()
@@ -332,7 +341,7 @@ FeatureRegistry::FeatureRegistry()
   MOSES_FNAME2("OxSourceFactoredLM", SourceOxLM);
   MOSES_FNAME2("OxTreeLM", OxLM<oxlm::FactoredTreeLM>);
 #endif
-
+  Add("ReloadingLM", new ReloadingFactory());
   Add("KENLM", new KenFactory());
 }
 
diff --git a/moses/LM/Ken.cpp b/moses/LM/Ken.cpp
index 428640290..c81f3b859 100644
--- a/moses/LM/Ken.cpp
+++ b/moses/LM/Ken.cpp
@@ -148,12 +148,8 @@ private:
 
 } // namespace
 
-template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
-  :LanguageModel(line)
-  ,m_factorType(factorType)
+template <class Model> void LanguageModelKen<Model>::LoadModel(const std::string &file, bool lazy)
 {
-  ReadParameters();
-
   lm::ngram::Config config;
   if(this->m_verbosity >= 1) {
     config.messages = &std::cerr;
@@ -170,6 +166,14 @@ template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::stri
   m_beginSentenceFactor = collection.AddFactor(BOS_);
 }
 
+template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
+  :LanguageModel(line)
+  ,m_factorType(factorType)
+{
+  ReadParameters();
+  LoadModel(file, lazy);
+}
+
 template <class Model> LanguageModelKen<Model>::LanguageModelKen(const LanguageModelKen<Model> &copy_from)
   :LanguageModel(copy_from.GetArgLine()),
    m_ngram(copy_from.m_ngram),
diff --git a/moses/LM/Ken.h b/moses/LM/Ken.h
index 73a957e93..3a94e4c0b 100644
--- a/moses/LM/Ken.h
+++ b/moses/LM/Ken.h
@@ -73,11 +73,15 @@ protected:
 
   FactorType m_factorType;
 
+  void LoadModel(const std::string &file, bool lazy);
+
   lm::WordIndex TranslateID(const Word &word) const {
     std::size_t factor = word.GetFactor(m_factorType)->GetId();
     return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
   }
 
+  std::vector<lm::WordIndex> m_lmIdLookup;
+
 private:
   LanguageModelKen(const LanguageModelKen<Model> &copy_from);
 
@@ -96,7 +100,6 @@ private:
     }
   }
 
-  std::vector<lm::WordIndex> m_lmIdLookup;
 
 protected:
   //bool m_oovFeatureEnabled; /// originally from LanguageModel, copied here to separate the interfaces. Called m_enableOOVFeature there
diff --git a/moses/LM/Reloading.cpp b/moses/LM/Reloading.cpp
new file mode 100644
index 000000000..0f9d80a70
--- /dev/null
+++ b/moses/LM/Reloading.cpp
@@ -0,0 +1,112 @@
+// $Id$
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "lm/binary_format.hh"
+#include "lm/enumerate_vocab.hh"
+#include "lm/left.hh"
+#include "lm/model.hh"
+
+#include "moses/FF/FFState.h"
+#include "moses/Hypothesis.h"
+#include "moses/Phrase.h"
+
+#include "moses/LM/Ken.h"
+#include "moses/LM/Reloading.h"
+#include "util/exception.hh"
+
+//#include "moses/Util.h"
+//#include "moses/StaticData.h"
+//#include <iostream>
+/*
+namespace Moses
+{
+namespace
+{
+
+struct ReloadingLMState : public FFState {
+  lm::ngram::State state;
+  virtual size_t hash() const {
+    return 0;
+  }
+  virtual bool operator==(const FFState& o) const {
+    return true;
+  }
+
+};
+} // namespace
+
+
+template <class Model> ReloadingLanguageModel<Model>::ReloadingLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(line,file,factorType,lazy)
+{
+  //
+  // This space intentionally left blank
+  //
+}
+template <class Model> const FFState *ReloadingLanguageModel<Model>::EmptyHypothesisState(const InputType &input) const
+{
+  ReloadingLMState *ret = new ReloadingLMState();
+  ret->state = m_ngram->BeginSentenceState();
+  return ret;
+}
+
+
+template <class Model> FFState *ReloadingLanguageModel<Model>::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
+{
+
+  std::auto_ptr<FFState> kenlmState(LanguageModelKen<Model>::EvaluateWhenApplied(hypo, ps, out));
+  const lm::ngram::State &out_state = static_cast<const ReloadingLMState&>(*kenlmState).state;
+
+
+  std::auto_ptr<ReloadingLMState> ret(new ReloadingLMState());
+  ret->state = out_state;
+
+  kenlmState.release();
+  return ret.release();
+}
+
+
+LanguageModel *ConstructReloadingLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
+{
+  lm::ngram::ModelType model_type;
+  if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
+    switch(model_type) {
+    case lm::ngram::PROBING:
+      return new ReloadingLanguageModel<lm::ngram::ProbingModel>(line, file,  factorType, lazy);
+    case lm::ngram::REST_PROBING:
+      return new ReloadingLanguageModel<lm::ngram::RestProbingModel>(line, file, factorType, lazy);
+    case lm::ngram::TRIE:
+      return new ReloadingLanguageModel<lm::ngram::TrieModel>(line, file, factorType, lazy);
+    case lm::ngram::QUANT_TRIE:
+      return new ReloadingLanguageModel<lm::ngram::QuantTrieModel>(line, file, factorType, lazy);
+    case lm::ngram::ARRAY_TRIE:
+      return new ReloadingLanguageModel<lm::ngram::ArrayTrieModel>(line, file, factorType, lazy);
+    case lm::ngram::QUANT_ARRAY_TRIE:
+      return new ReloadingLanguageModel<lm::ngram::QuantArrayTrieModel>(line, file, factorType, lazy);
+    default:
+      UTIL_THROW2("Unrecognized kenlm model type " << model_type);
+    }
+  } else {
+    return new ReloadingLanguageModel<lm::ngram::ProbingModel>(line, file, factorType, lazy);
+  }
+}
+
+} // namespace Moses
+*/
diff --git a/moses/LM/Reloading.h b/moses/LM/Reloading.h
new file mode 100644
index 000000000..3993fe9d7
--- /dev/null
+++ b/moses/LM/Reloading.h
@@ -0,0 +1,220 @@
+// $Id$
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_LanguageModelReloading_h
+#define moses_LanguageModelReloading_h
+
+#include <string>
+
+#include "moses/LM/Base.h"
+#include "moses/LM/Ken.h"
+
+#include "util/tokenize_piece.hh"
+#include "util/string_stream.hh"
+
+#include <iostream>
+namespace Moses
+{
+
+class FFState;
+
+//LanguageModel *ConstructReloadingLM(const std::string &line);
+//LanguageModel *ConstructReloadingLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy);
+/*
+ namespace {
+class MappingBuilder : public lm::EnumerateVocab
+{
+public:
+  MappingBuilder(FactorCollection &factorCollection, std::vector<lm::WordIndex> &mapping)
+    : m_factorCollection(factorCollection), m_mapping(mapping) {}
+
+  void Add(lm::WordIndex index, const StringPiece &str) {
+    std::size_t factorId = m_factorCollection.AddFactor(str)->GetId();
+    if (m_mapping.size() <= factorId) {
+      // 0 is <unk> :-)
+      m_mapping.resize(factorId + 1);
+    }
+    m_mapping[factorId] = index;
+  }
+
+private:
+  FactorCollection &m_factorCollection;
+  std::vector<lm::WordIndex> &m_mapping;
+};
+ }
+*/
+template <class Model> class ReloadingLanguageModel : public LanguageModelKen<Model>
+{
+public:
+
+  ReloadingLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(line, file, factorType, lazy), m_file(file), m_lazy(lazy) {
+
+    std::cerr << "ReloadingLM constructor: " << m_file << std::endl;
+    //    std::cerr << std::string(line).replace(0,11,"KENLM") << std::endl;
+
+  }
+
+  virtual void InitializeForInput(ttasksptr const& ttask) {
+    std::cerr << "ReloadingLM InitializeForInput" << std::endl;
+    LanguageModelKen<Model>::LoadModel(m_file, m_lazy);
+    /*
+    lm::ngram::Config config;
+    if(this->m_verbosity >= 1) {
+      config.messages = &std::cerr;
+    } else {
+      config.messages = NULL;
+    }
+    FactorCollection &collection = FactorCollection::Instance();
+    MappingBuilder builder(collection, m_lmIdLookup);
+    config.enumerate_vocab = &builder;
+    config.load_method = m_lazy ? util::LAZY : util::POPULATE_OR_READ;
+
+    m_ngram.reset(new Model(m_file.c_str(), config));
+
+    m_beginSentenceFactor = collection.AddFactor(BOS_);
+    */
+  };
+
+  /*
+  ReloadingLanguageModel(const std::string &line) : LanguageModelKen<Model>(ConstructKenLM(std::string(line).replace(0,11,"KENLM"))) {
+    std::cerr << "ReloadingLM constructor" << std::endl;
+    std::cerr << std::string(line).replace(0,11,"KENLM") << std::endl;
+  }
+  */
+  /*
+  ~ReloadingLanguageModel() {
+    delete m_lm;
+  }
+
+  virtual const FFState *EmptyHypothesisState(const InputType &input) const {
+    return m_lm->EmptyHypothesisState(input);
+  }
+
+  virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const {
+    m_lm->CalcScore(phrase, fullScore, ngramScore, oovCount);
+  }
+
+  virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const {
+    return m_lm->EvaluateWhenApplied(hypo, ps, out);
+  }
+
+  virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const {
+    return m_lm->EvaluateWhenApplied(cur_hypo, featureID, accumulator);
+  }
+
+  virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const {
+    return m_lm->EvaluateWhenApplied(hyperedge, featureID, accumulator);
+  }
+
+  virtual void IncrementalCallback(Incremental::Manager &manager) const {
+    m_lm->IncrementalCallback(manager);
+  }
+
+  virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const {
+    m_lm->ReportHistoryOrder(out, phrase);
+  }
+
+  virtual bool IsUseable(const FactorMask &mask) const {
+    return m_lm->IsUseable(mask);
+  }
+
+
+  private:
+
+  LanguageModel *m_lm;
+  */
+
+protected:
+
+  using LanguageModelKen<Model>::m_ngram;
+  using LanguageModelKen<Model>::m_lmIdLookup;
+  using LanguageModelKen<Model>::m_beginSentenceFactor;
+
+  const std::string m_file;
+  bool m_lazy;
+};
+
+
+LanguageModel *ConstructReloadingLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
+{
+  lm::ngram::ModelType model_type;
+  if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
+    switch(model_type) {
+    case lm::ngram::PROBING:
+      return new ReloadingLanguageModel<lm::ngram::ProbingModel>(line, file, factorType, lazy);
+    case lm::ngram::REST_PROBING:
+      return new ReloadingLanguageModel<lm::ngram::RestProbingModel>(line, file, factorType, lazy);
+    case lm::ngram::TRIE:
+      return new ReloadingLanguageModel<lm::ngram::TrieModel>(line, file, factorType, lazy);
+    case lm::ngram::QUANT_TRIE:
+      return new ReloadingLanguageModel<lm::ngram::QuantTrieModel>(line, file, factorType, lazy);
+    case lm::ngram::ARRAY_TRIE:
+      return new ReloadingLanguageModel<lm::ngram::ArrayTrieModel>(line, file, factorType, lazy);
+    case lm::ngram::QUANT_ARRAY_TRIE:
+      return new ReloadingLanguageModel<lm::ngram::QuantArrayTrieModel>(line, file, factorType, lazy);
+    default:
+      UTIL_THROW2("Unrecognized kenlm model type " << model_type);
+    }
+  } else {
+    return new ReloadingLanguageModel<lm::ngram::ProbingModel>(line, file, factorType, lazy);
+  }
+}
+
+LanguageModel *ConstructReloadingLM(const std::string &lineOrig)
+{
+  FactorType factorType = 0;
+  std::string filePath;
+  bool lazy = false;
+
+  util::TokenIter<util::SingleCharacter, true> argument(lineOrig, ' ');
+  ++argument; // KENLM
+
+  util::StringStream line;
+  line << "KENLM";
+
+  for (; argument; ++argument) {
+    const char *equals = std::find(argument->data(), argument->data() + argument->size(), '=');
+    UTIL_THROW_IF2(equals == argument->data() + argument->size(),
+                   "Expected = in ReloadingLM argument " << *argument);
+    StringPiece name(argument->data(), equals - argument->data());
+    StringPiece value(equals + 1, argument->data() + argument->size() - equals - 1);
+    if (name == "factor") {
+      factorType = boost::lexical_cast<FactorType>(value);
+    } else if (name == "order") {
+      // Ignored
+    } else if (name == "path") {
+      filePath.assign(value.data(), value.size());
+    } else if (name == "lazyken") {
+      lazy = boost::lexical_cast<bool>(value);
+    } else {
+      // pass to base class to interpret
+      line << " " << name << "=" << value;
+    }
+  }
+
+  return ConstructReloadingLM(line.str(), filePath, factorType, lazy);
+}
+
+
+} // namespace Moses
+
+#endif
+
diff --git a/moses/TranslationModel/UG/Jamfile b/moses/TranslationModel/UG/Jamfile
index d41e0f5ca..34e0b6663 100644
--- a/moses/TranslationModel/UG/Jamfile
+++ b/moses/TranslationModel/UG/Jamfile
@@ -1,3 +1,13 @@
+exe ptable-sigtest-filter : 
+filter-pt.cc 
+$(TOP)/moses//moses
+$(TOP)/moses/TranslationModel/UG/generic//generic 
+$(TOP)//boost_iostreams 
+$(TOP)//boost_program_options 
+$(TOP)/moses/TranslationModel/UG/mm//mm 
+$(TOP)/moses/TranslationModel/UG//mmsapt 
+$(TOP)/util//kenutil
+;
 exe try-align : 
 try-align.cc 
 $(TOP)/moses//moses
diff --git a/moses/TranslationModel/UG/filter-pt.cc b/moses/TranslationModel/UG/filter-pt.cc
new file mode 100644
index 000000000..cb288d534
--- /dev/null
+++ b/moses/TranslationModel/UG/filter-pt.cc
@@ -0,0 +1,669 @@
+// -*- mode: c++; indent-tabs-mode: nil; tab-width:2  -*-
+// significance filtering for phrase tables as described in
+// H. Johnson, et al. (2007) Improving Translation Quality
+// by Discarding Most of the Phrasetable. EMNLP 2007.
+// Implemented by Marcin Junczys-Dowmunt
+// recommended use: -l a+e -n <ttable-limit>
+#include <cstring> 
+#include <cassert>
+#include <cstdio>
+#include <cstdlib>
+#include <algorithm>
+#include <fstream>
+#include <sstream>
+
+#include <vector>
+#include <iostream>
+#include <set>
+
+#include <boost/thread/tss.hpp>
+#include <boost/thread.hpp> 
+#include <boost/unordered_map.hpp>
+#include <boost/program_options.hpp>
+#include <boost/shared_ptr.hpp>
+#include <boost/foreach.hpp>
+
+#ifdef WIN32
+#include "WIN32_functions.h"
+#else
+#include <unistd.h>
+#endif
+
+#include "mm/ug_bitext.h"
+
+// constants
+const size_t MINIMUM_SIZE_TO_KEEP = 10000;     // increase this to improve memory usage,
+// reduce for speed
+const std::string SEPARATOR       = " ||| ";
+
+const double ALPHA_PLUS_EPS  = -1000.0;        // dummy value
+const double ALPHA_MINUS_EPS = -2000.0;        // dummy value
+
+// configuration params
+int pfe_filter_limit = 0;               // 0 = don't filter anything based on P(f|e)
+bool print_cooc_counts = false;         // add cooc counts to phrase table?
+bool print_neglog_significance = false; // add -log(p) to phrase table?
+double sig_filter_limit = 0;            // keep phrase pairs with -log(sig) > sig_filter_limit
+//    higher = filter-more
+bool pef_filter_only = false;           // only filter based on pef
+bool hierarchical = false;
+
+double p_111 = 0.0;                     // alpha
+size_t pt_lines = 0;
+size_t nremoved_sigfilter = 0;
+size_t nremoved_pfefilter = 0;
+
+typedef sapt::L2R_Token<sapt::SimpleWordId> Token;
+typedef sapt::mmTtrack<Token> ttrack_t;
+typedef sapt::mmTSA<Token> tsa_t;
+typedef sapt::TokenIndex tind_t;
+
+int num_lines;
+
+boost::mutex in_mutex;
+boost::mutex out_mutex;
+boost::mutex err_mutex;
+
+typedef size_t TextLenType;
+
+typedef boost::shared_ptr<std::vector<TextLenType> > SentIdSet;
+
+class Cache {
+  typedef std::pair<SentIdSet, clock_t> ClockedSet;
+  typedef boost::unordered_map<std::string, ClockedSet> ClockedMap;
+  
+  public:
+    
+    SentIdSet get(const std::string& phrase) {
+      boost::shared_lock<boost::shared_mutex> lock(m_mutex);
+      if(m_cont.count(phrase)) {
+        ClockedSet& set = m_cont[phrase];
+        set.second = clock();
+        return set.first;
+      }
+      return SentIdSet( new SentIdSet::element_type() );
+    }
+    
+    void put(const std::string& phrase, const SentIdSet set) {
+      boost::unique_lock<boost::shared_mutex> lock(m_mutex);
+      m_cont[phrase] = std::make_pair(set, clock());
+    }
+    
+    static void set_max_cache(size_t max_cache) {
+      s_max_cache = max_cache;
+    }
+    
+    void prune() {
+      if(s_max_cache > 0) {
+        boost::upgrade_lock<boost::shared_mutex> lock(m_mutex);
+        if(m_cont.size() > s_max_cache) {
+          std::vector<clock_t> clocks;
+          for(ClockedMap::iterator it = m_cont.begin(); it != m_cont.end(); it++) 
+            clocks.push_back(it->second.second);
+          
+          std::sort(clocks.begin(), clocks.end());
+          clock_t out = clocks[m_cont.size() - s_max_cache];
+          
+          boost::upgrade_to_unique_lock<boost::shared_mutex> uniq_lock(lock);
+          for(ClockedMap::iterator it = m_cont.begin(); it != m_cont.end(); it++)
+            if(it->second.second < out)
+              m_cont.erase(it);
+        }
+      }
+    }
+  
+  private:
+    ClockedMap m_cont;
+    boost::shared_mutex m_mutex;
+    static size_t s_max_cache;
+};
+
+size_t Cache::s_max_cache = 0;
+
+struct SA {
+  tind_t V;
+  boost::shared_ptr<ttrack_t> T;
+  tsa_t I;
+  Cache cache;
+};
+
+std::vector<boost::shared_ptr<SA> > e_sas;
+std::vector<boost::shared_ptr<SA> > f_sas;
+
+#undef min
+
+void usage()
+{
+  std::cerr << "\nFilter phrase table using significance testing as described\n"
+            << "in H. Johnson, et al. (2007) Improving Translation Quality\n"
+            << "by Discarding Most of the Phrasetable. EMNLP 2007.\n";
+}
+
+struct PTEntry {
+  PTEntry(const std::string& str, int index);
+  std::string f_phrase;
+  std::string e_phrase;
+  std::string extra;
+  std::string scores;
+  float pfe;
+  int cf;
+  int ce;
+  int cfe;
+  float nlog_pte;
+  void set_cooc_stats(int _cef, int _cf, int _ce, float nlp) {
+    cfe = _cef;
+    cf = _cf;
+    ce = _ce;
+    nlog_pte = nlp;
+  }
+
+};
+
+PTEntry::PTEntry(const std::string& str, int index) :
+  cf(0), ce(0), cfe(0), nlog_pte(0.0)
+{
+  size_t pos = 0;
+  std::string::size_type nextPos = str.find(SEPARATOR, pos);
+  this->f_phrase = str.substr(pos,nextPos);
+
+  pos = nextPos + SEPARATOR.size();
+  nextPos = str.find(SEPARATOR, pos);
+  this->e_phrase = str.substr(pos,nextPos-pos);
+
+  pos = nextPos + SEPARATOR.size();
+  nextPos = str.find(SEPARATOR, pos);
+  if (nextPos < str.size()) {
+    this->scores = str.substr(pos,nextPos-pos);
+
+    pos = nextPos + SEPARATOR.size();
+    this->extra = str.substr(pos);
+  }
+  else {
+    this->scores = str.substr(pos,str.size()-pos);
+  }
+
+  int c = 0;
+  std::string::iterator i=scores.begin();
+  if (index > 0) {
+    for (; i != scores.end(); ++i) {
+      if ((*i) == ' ') {
+        c++;
+        if (c == index) break;
+      }
+    }
+  }
+  if (i != scores.end()) {
+    ++i;
+  }
+  char f[24];
+  char *fp=f;
+  while (i != scores.end() && *i != ' ') {
+    *fp++=*i++;
+  }
+  *fp++=0;
+
+  this->pfe = atof(f);
+}
+
+struct PfeComparer {
+  bool operator()(const PTEntry* a, const PTEntry* b) const {
+    return a->pfe > b->pfe;
+  }
+};
+
+struct NlogSigThresholder {
+  NlogSigThresholder(float threshold) : t(threshold) {}
+  float t;
+  bool operator()(const PTEntry* a) const {
+    if (a->nlog_pte < t) {
+      delete a;
+      return true;
+    } else return false;
+  }
+};
+
+std::ostream& operator << (std::ostream& os, const PTEntry& pp)
+{
+  os << pp.f_phrase << " ||| " << pp.e_phrase;
+  os << " ||| " << pp.scores;
+  if (pp.extra.size()>0) os << " ||| " << pp.extra;
+  if (print_cooc_counts) os << " ||| " << pp.cfe << " " << pp.cf << " " << pp.ce;
+  if (print_neglog_significance) os << " ||| " << pp.nlog_pte;
+  return os;
+}
+
+void print(int a, int b, int c, int d, float p)
+{
+  std::cerr << a << "\t" << b << "\t P=" << p << "\n"
+            << c << "\t" << d << "\t xf="
+            << (double)(b)*(double)(c)/(double)(a+1)/(double)(d+1) << "\n\n";
+}
+
+// 2x2 (one-sided) Fisher's exact test
+// see B. Moore. (2004) On Log Likelihood and the Significance of Rare Events
+double fisher_exact(int cfe, int ce, int cf)
+{
+  assert(cfe <= ce);
+  assert(cfe <= cf);
+
+  int a = cfe;
+  int b = (cf - cfe);
+  int c = (ce - cfe);
+  int d = (num_lines - ce - cf + cfe);
+  int n = a + b + c + d;
+
+  double cp = exp(lgamma(1+a+c) + lgamma(1+b+d) + lgamma(1+a+b) + lgamma(1+c+d)
+                  - lgamma(1+n) - lgamma(1+a) - lgamma(1+b) - lgamma(1+c)
+                  - lgamma(1+d));
+  double total_p = 0.0;
+  int tc = std::min(b,c);
+  for (int i=0; i<=tc; i++) {
+    total_p += cp;
+    double coef = (double)(b)*(double)(c)/(double)(a+1)/(double)(d+1);
+    cp *= coef;
+    ++a;
+    --c;
+    ++d;
+    --b;
+  }
+  return total_p;
+}
+
+template <class setType>
+void ordered_set_intersect(setType& out, const setType set_1, const setType set_2)
+{
+    std::set_intersection(set_1->begin(), set_1->end(), set_2->begin(),
+                          set_2->end(), inserter(*out, out->begin()) );
+}
+
+
+void lookup_phrase(SentIdSet& ids, const std::string& phrase,
+                   tsa_t &my_sa, tind_t &my_v, Cache& cache)
+{
+    ids = cache.get(phrase);
+    if(ids->empty()) {
+      
+      std::vector<sapt::id_type> snt;
+      my_v.fillIdSeq(phrase, snt);
+
+      tsa_t::tree_iterator m(&my_sa);
+      size_t k = 0;
+      while (k < snt.size() && m.extend(snt[k])) ++k;
+      if(k == snt.size()) {
+        ids->reserve(m.approxOccurrenceCount()+10);
+        sapt::tsa::ArrayEntry I(m.lower_bound(-1));
+        char const* stop = m.upper_bound(-1);
+        do {
+          m.root->readEntry(I.next,I);
+          ids->push_back(I.sid);
+        } while (I.next != stop);
+        
+        std::sort(ids->begin(), ids->end());
+        SentIdSet::element_type::iterator it =
+          std::unique(ids->begin(), ids->end());
+        ids->resize(it - ids->begin());
+        
+        if(ids->size() >= MINIMUM_SIZE_TO_KEEP)
+          cache.put(phrase, ids);
+      }
+    }
+}
+
+void lookup_multiple_phrases(SentIdSet& ids, std::vector<std::string> & phrases,
+                             tsa_t & my_sa, tind_t &my_v,
+                             const std::string & rule, Cache& cache) 
+{ 
+
+    if (phrases.size() == 1) {
+        lookup_phrase(ids, phrases.front(), my_sa, my_v, cache);
+    }
+    else {
+        SentIdSet main_set( new SentIdSet::element_type() );
+        bool first = true;
+        SentIdSet first_set( new SentIdSet::element_type() );
+        lookup_phrase(first_set, phrases.front(), my_sa, my_v, cache);
+        for (std::vector<std::string>::iterator phrase=phrases.begin()+1;
+             phrase != phrases.end(); ++phrase) {
+            SentIdSet temp_set( new SentIdSet::element_type() );
+            lookup_phrase(temp_set, *phrase, my_sa, my_v, cache);
+            if (first) {
+                ordered_set_intersect(main_set, first_set, temp_set);
+                first = false;
+            }
+            else {
+                SentIdSet new_set( new SentIdSet::element_type() );
+                ordered_set_intersect(new_set, main_set, temp_set);
+                main_set->swap(*new_set);
+            }
+        }
+        ids->swap(*main_set);
+    }
+}
+
+
+void find_occurrences(SentIdSet& ids, const std::string& rule,
+                      tsa_t& my_sa, tind_t &my_v, Cache& cache)
+{
+    // we search for hierarchical rules by stripping away NT and looking for terminals sequences
+    // if a rule contains multiple sequences of terminals, we intersect their occurrences.
+    if (hierarchical) {
+        //   std::cerr << "splitting up phrase: " << phrase << "\n";
+        int pos = 0;
+        int NTStartPos, NTEndPos;
+        std::vector<std::string> phrases;
+        while (rule.find("] ", pos) < rule.size()) {
+            NTStartPos = rule.find("[",pos) - 1; // -1 to cut space before NT
+            NTEndPos = rule.find("] ",pos);
+            if (NTStartPos < pos) { // no space: NT at start of rule (or two consecutive NTs)
+                pos = NTEndPos + 2;
+                continue;
+            }
+            phrases.push_back(rule.substr(pos,NTStartPos-pos));
+            pos = NTEndPos + 2;
+        }
+
+        NTStartPos = rule.find("[",pos) - 1; // LHS of rule
+        if (NTStartPos > pos) {
+            phrases.push_back(rule.substr(pos,NTStartPos-pos));
+        }
+
+        lookup_multiple_phrases(ids, phrases, my_sa, my_v, rule, cache);
+    }
+    else {
+        lookup_phrase(ids, rule, my_sa, my_v, cache);
+    }
+}
+
+
+// input: unordered list of translation options for a single source phrase
+void compute_cooc_stats_and_filter(std::vector<PTEntry*>& options)
+{
+  if (pfe_filter_limit > 0 && options.size() > pfe_filter_limit) {
+    nremoved_pfefilter += (options.size() - pfe_filter_limit);
+    std::nth_element(options.begin(), options.begin() + pfe_filter_limit,
+                     options.end(), PfeComparer());
+    for (std::vector<PTEntry*>::iterator i = options.begin() + pfe_filter_limit;
+         i != options.end(); ++i)
+      delete *i;
+    options.erase(options.begin() + pfe_filter_limit,options.end());
+  }
+  
+  if (pef_filter_only)
+    return;
+  
+  if (options.empty())
+    return;
+  
+  size_t cf = 0;
+  std::vector<SentIdSet> fsets;
+  BOOST_FOREACH(boost::shared_ptr<SA>& f_sa, f_sas) {
+    fsets.push_back( boost::shared_ptr<SentIdSet::element_type>(new SentIdSet::element_type()) );
+    find_occurrences(fsets.back(), options.front()->f_phrase, f_sa->I, f_sa->V, f_sa->cache);
+    cf += fsets.back()->size();
+  }
+  
+  for (std::vector<PTEntry*>::iterator i = options.begin();
+       i != options.end(); ++i) {
+    const std::string& e_phrase = (*i)->e_phrase;
+    
+    size_t ce = 0;
+    std::vector<SentIdSet> esets;
+    BOOST_FOREACH(boost::shared_ptr<SA>& e_sa,  e_sas) {
+      esets.push_back( boost::shared_ptr<SentIdSet::element_type>(new SentIdSet::element_type()) );
+      find_occurrences(esets.back(), e_phrase, e_sa->I, e_sa->V, e_sa->cache);
+      ce += esets.back()->size();
+    }
+      
+    size_t cef = 0;
+    for(size_t j = 0; j < fsets.size(); ++j) {
+      SentIdSet efset( new SentIdSet::element_type() );
+      ordered_set_intersect(efset, fsets[j], esets[j]);
+      cef += efset->size();
+    }
+    
+    double nlp = -log(fisher_exact(cef, cf, ce));
+    (*i)->set_cooc_stats(cef, cf, ce, nlp);
+  }
+  
+  std::vector<PTEntry*>::iterator new_end =
+    std::remove_if(options.begin(), options.end(),
+                   NlogSigThresholder(sig_filter_limit));
+  nremoved_sigfilter += (options.end() - new_end);
+  options.erase(new_end,options.end());
+}
+
+void filter_thread(std::istream* in, std::ostream* out, int pfe_index) {
+      
+  std::vector<std::string> lines;
+  std::string prev = "";
+  std::vector<PTEntry*> options;
+  while(true) {
+    {
+      boost::mutex::scoped_lock lock(in_mutex);
+      if(in->eof())
+        break;
+      
+      lines.clear();
+      std::string line;
+      while(getline(*in, line) && lines.size() < 500000)
+        lines.push_back(line);
+    }
+    
+    std::stringstream out_temp;
+    for(std::vector<std::string>::iterator it = lines.begin(); it != lines.end(); it++) {
+      size_t tmp_lines = ++pt_lines;
+      if(tmp_lines % 10000 == 0) {
+        boost::mutex::scoped_lock lock(err_mutex);
+        std::cerr << ".";
+      
+        if(tmp_lines % 500000 == 0)
+          std::cerr << "[n:" << tmp_lines << "]\n";
+  
+        if(tmp_lines % 10000000 == 0) {
+          float pfefper = (100.0*(float)nremoved_pfefilter)/(float)pt_lines;
+          float sigfper = (100.0*(float)nremoved_sigfilter)/(float)pt_lines;
+          std::cerr << "------------------------------------------------------\n"
+                    << "  unfiltered phrases pairs: " << pt_lines << "\n"
+                    << "\n"
+                    << "     P(f|e) filter [first]: " << nremoved_pfefilter << "   (" << pfefper << "%)\n"
+                    << "       significance filter: " << nremoved_sigfilter << "   (" << sigfper << "%)\n"
+                    << "            TOTAL FILTERED: " << (nremoved_pfefilter + nremoved_sigfilter) << "   (" << (sigfper + pfefper) << "%)\n"
+                    << "\n"
+                    << "     FILTERED phrase pairs: " << (pt_lines - nremoved_pfefilter - nremoved_sigfilter) << "   (" << (100.0-sigfper - pfefper) << "%)\n"
+                    << "------------------------------------------------------\n";
+        }
+      }
+      
+      if(pt_lines % 10000 == 0) {
+        BOOST_FOREACH(boost::shared_ptr<SA> f_sa, f_sas)
+          f_sa->cache.prune();
+        BOOST_FOREACH(boost::shared_ptr<SA> e_sa, e_sas)
+          e_sa->cache.prune();
+      }
+      
+      if(it->length() > 0) {
+        PTEntry* pp = new PTEntry(it->c_str(), pfe_index);
+        if (prev != pp->f_phrase) {
+          prev = pp->f_phrase;
+  
+          if (!options.empty()) {  // always true after first line
+            compute_cooc_stats_and_filter(options);
+          }
+          
+          for (std::vector<PTEntry*>::iterator i = options.begin();
+               i != options.end(); ++i) {
+            out_temp << **i << '\n';
+            delete *i;
+          }
+        
+          options.clear();
+          options.push_back(pp);
+  
+        } else {
+          options.push_back(pp);
+        }
+      }
+    }
+    boost::mutex::scoped_lock lock(out_mutex);
+    *out << out_temp.str() << std::flush;
+  }
+  compute_cooc_stats_and_filter(options);
+  
+  boost::mutex::scoped_lock lock(out_mutex);
+  for (std::vector<PTEntry*>::iterator i = options.begin();
+       i != options.end(); ++i) {
+    *out << **i << '\n';
+    delete *i;
+  }
+  *out << std::flush;
+}
+
+namespace po = boost::program_options;
+
+int main(int argc, char * argv[])
+{
+  bool help;
+  std::vector<std::string> efiles;
+  std::vector<std::string> ffiles;
+  int pfe_index = 2;
+  int threads = 1;
+  size_t max_cache = 0;
+  std::string str_sig_filter_limit;
+   
+  po::options_description general("General options");
+  general.add_options()
+    ("english,e", po::value<std::vector<std::string> >(&efiles)->multitoken(),
+     "english.suf-arr")
+    ("french,f", po::value<std::vector<std::string> >(&ffiles)->multitoken(),
+     "french.suf-arr")
+    ("pfe-index,i", po::value(&pfe_index)->default_value(2),
+     "Index of P(f|e) in phrase table")
+    ("pfe-filter-limit,n", po::value(&pfe_filter_limit)->default_value(0),
+     "0, 1...: 0=no filtering, >0 sort by P(e|f) and keep the top num elements")
+    ("threads,t", po::value(&threads)->default_value(1),
+     "number of threads to use")
+    ("max-cache,m", po::value(&max_cache)->default_value(0),
+     "limit cache to  arg  most recent phrases")
+    ("print-cooc,c", po::value(&print_cooc_counts)->zero_tokens()->default_value(false),
+     "add the coocurrence counts to the phrase table")
+    ("print-significance,p", po::value(&print_neglog_significance)->zero_tokens()->default_value(false),
+     "add -log(significance) to the phrase table")
+    ("hierarchical,x", po::value(&hierarchical)->zero_tokens()->default_value(false),
+     "filter hierarchical rule table")
+    ("sig-filter-limit,l", po::value(&str_sig_filter_limit),
+     ">0.0, a+e, or a-e: keep values that have a -log significance > this")
+    ("help,h", po::value(&help)->zero_tokens()->default_value(false),
+     "display this message")
+  ;
+
+  po::options_description cmdline_options("Allowed options");
+  cmdline_options.add(general);
+  po::variables_map vm;
+  
+  try { 
+    po::store(po::command_line_parser(argc,argv).
+              options(cmdline_options).run(), vm);
+    po::notify(vm);
+  }
+  catch (std::exception& e) {
+    std::cout << "Error: " << e.what() << std::endl << std::endl;
+    
+    usage();
+    std::cout << cmdline_options << std::endl;
+    exit(0);
+  }
+  
+  if(vm["help"].as<bool>()) {
+    usage();
+    std::cout << cmdline_options << std::endl;
+    exit(0);
+  }
+   
+  if(vm.count("pfe-filter-limit"))
+    std::cerr << "P(f|e) filter limit: " << pfe_filter_limit << std::endl;
+  if(vm.count("threads"))
+    std::cerr << "Using threads: " << threads << std::endl;  
+  if(vm.count("max-cache"))
+    std::cerr << "Using max phrases in caches: " << max_cache << std::endl;
+    
+  if (strcmp(str_sig_filter_limit.c_str(),"a+e") == 0) {
+    sig_filter_limit = ALPHA_PLUS_EPS;
+  } else if (strcmp(str_sig_filter_limit.c_str(),"a-e") == 0) {
+    sig_filter_limit = ALPHA_MINUS_EPS;
+  } else {
+    char *x;
+    sig_filter_limit = strtod(str_sig_filter_limit.c_str(), &x);
+    if (sig_filter_limit < 0.0) {
+      std::cerr << "Filter limit (-l) must be either 'a+e', 'a-e' or a real number >= 0.0\n";
+      usage();
+    }
+  }
+    
+  if (sig_filter_limit == 0.0) pef_filter_only = true;
+  //-----------------------------------------------------------------------------
+  if (optind != argc || ((efiles.empty() || ffiles.empty()) && !pef_filter_only)) {
+    usage();
+  }
+  
+  if (!pef_filter_only) {
+    size_t elines = 0;
+    BOOST_FOREACH(std::string& efile, efiles) {
+      e_sas.push_back(boost::shared_ptr<SA>(new SA()));
+      e_sas.back()->V.open(efile + ".tdx");
+      e_sas.back()->T.reset(new ttrack_t());  
+      e_sas.back()->T->open(efile + ".mct");
+      e_sas.back()->I.open(efile + ".sfa", e_sas.back()->T);
+      elines += e_sas.back()->T->size(); 
+    }
+    
+    size_t flines = 0;
+    BOOST_FOREACH(std::string& ffile, ffiles) {
+      f_sas.push_back(boost::shared_ptr<SA>(new SA()));
+      f_sas.back()->V.open(ffile + ".tdx");
+      f_sas.back()->T.reset(new ttrack_t());  
+      f_sas.back()->T->open(ffile + ".mct");
+      f_sas.back()->I.open(ffile + ".sfa", f_sas.back()->T);
+      flines += f_sas.back()->T->size(); 
+    }
+    
+    if (elines != flines) {
+      std::cerr << "Number of lines in e-corpus != number of lines in f-corpus!\n";
+      usage();
+      exit(1);
+    } else {
+      std::cerr << "Training corpus: " << elines << " lines\n";
+      num_lines = elines;
+    }
+    p_111 = -log(fisher_exact(1,1,1));
+    std::cerr << "\\alpha = " << p_111 << "\n";
+    if (sig_filter_limit == ALPHA_MINUS_EPS) {
+      sig_filter_limit = p_111 - 0.001;
+    } else if (sig_filter_limit == ALPHA_PLUS_EPS) {
+      sig_filter_limit = p_111 + 0.001;
+    }
+    std::cerr << "Sig filter threshold is = " << sig_filter_limit << "\n";
+  } else {
+    std::cerr << "Filtering using P(e|f) only. n=" << pfe_filter_limit << std::endl;
+  }
+
+  Cache::set_max_cache(max_cache);
+  std::ios_base::sync_with_stdio(false);
+  
+  boost::thread_group threadGroup;
+  for(int i = 0; i < threads; i++) 
+    threadGroup.add_thread(new boost::thread(filter_thread, &std::cin, &std::cout, pfe_index));
+  threadGroup.join_all();
+
+  float pfefper = (100.0*(float)nremoved_pfefilter)/(float)pt_lines;
+  float sigfper = (100.0*(float)nremoved_sigfilter)/(float)pt_lines;
+  
+  std::cerr << "\n\n------------------------------------------------------\n"
+            << "  unfiltered phrases pairs: " << pt_lines << "\n"
+            << "\n"
+            << "     P(f|e) filter [first]: " << nremoved_pfefilter << "   (" << pfefper << "%)\n"
+            << "       significance filter: " << nremoved_sigfilter << "   (" << sigfper << "%)\n"
+            << "            TOTAL FILTERED: " << (nremoved_pfefilter + nremoved_sigfilter) << "   (" << (sigfper + pfefper) << "%)\n"
+            << "\n"
+            << "     FILTERED phrase pairs: " << (pt_lines - nremoved_pfefilter - nremoved_sigfilter) << "   (" << (100.0-sigfper - pfefper) << "%)\n"
+            << "------------------------------------------------------\n";  
+}
diff --git a/moses/TranslationModel/UG/mmsapt.cpp b/moses/TranslationModel/UG/mmsapt.cpp
index ed60771ae..024ae44d3 100644
--- a/moses/TranslationModel/UG/mmsapt.cpp
+++ b/moses/TranslationModel/UG/mmsapt.cpp
@@ -188,7 +188,7 @@ namespace Moses
 
     dflt = pair<string,string>("workers","0");
     m_workers = atoi(param.insert(dflt).first->second.c_str());
-    if (m_workers == 0) m_workers = boost::thread::hardware_concurrency();
+    if (m_workers == 0) m_workers = StaticData::Instance().ThreadCount();
     else m_workers = min(m_workers,size_t(boost::thread::hardware_concurrency()));
     
     dflt = pair<string,string>("bias-loglevel","0");
diff --git a/moses/TrellisPath.cpp b/moses/TrellisPath.cpp
index 1f09b2eed..012b9a7af 100644
--- a/moses/TrellisPath.cpp
+++ b/moses/TrellisPath.cpp
@@ -42,7 +42,7 @@ TrellisPath::TrellisPath(const Hypothesis *hypo)
 
 void TrellisPath::InitTotalScore()
 {
-  m_totalScore		= m_path[0]->GetWinningHypo()->GetFutureScore();
+  m_totalScore = m_path[0]->GetWinningHypo()->GetFutureScore();
 
   //calc score
   size_t sizePath = m_path.size();
@@ -50,7 +50,7 @@ void TrellisPath::InitTotalScore()
     const Hypothesis *hypo = m_path[pos];
     const Hypothesis *winningHypo = hypo->GetWinningHypo();
     if (hypo != winningHypo) {
-      m_totalScore = m_totalScore - winningHypo->GetFutureScore() + hypo->GetFutureScore();
+      m_totalScore += hypo->GetFutureScore() - winningHypo->GetFutureScore();
     }
   }
 }
@@ -169,9 +169,6 @@ TrellisPath::
 GetScoreBreakdown() const
 {
   if (!m_scoreBreakdown) {
-    float totalScore = m_path[0]->GetWinningHypo()->GetFutureScore();
-    // calculated for sanity check only
-
     m_scoreBreakdown.reset(new ScoreComponentCollection());
     m_scoreBreakdown->PlusEquals(m_path[0]->GetWinningHypo()->GetScoreBreakdown());
 
@@ -184,13 +181,10 @@ GetScoreBreakdown() const
       const Hypothesis *hypo = m_path[pos];
       const Hypothesis *winningHypo = hypo->GetWinningHypo();
       if (hypo != winningHypo) {
-        totalScore += hypo->GetFutureScore() - winningHypo->GetFutureScore();
         m_scoreBreakdown->MinusEquals(winningHypo->GetScoreBreakdown());
         m_scoreBreakdown->PlusEquals(hypo->GetScoreBreakdown());
       }
     }
-
-    assert(totalScore == m_totalScore);
   }
 
   return m_scoreBreakdown;
diff --git a/run-regtests.sh b/run-regtests.sh
index 3d93741d5..843ee3a94 100755
--- a/run-regtests.sh
+++ b/run-regtests.sh
@@ -53,18 +53,18 @@ git submodule update regtest
 # -- compile from scratch with server, run regtests
 set -x
 if [ "$full" == true ] ; then
-    ./bjam -j$j --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph --no-xmlrpc-c --with-regtest=$regtest -a $skipcompact $@ $q || exit $?
+    ./bjam -j$j --with-mm --with-mm-extras --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph --no-xmlrpc-c --with-regtest=$regtest -a $skipcompact $@ $q || exit $?
     if ./regression-testing/run-single-test.perl --server --startuptest  ; then
-    	./bjam -j$j --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph $xmlrpc --with-regtest=$regtest -a $skipcompact $@ $q 
+    	./bjam -j$j --with-mm --with-mm-extras --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph $xmlrpc --with-regtest=$regtest -a $skipcompact $@ $q 
     fi
 else
    # when investigating failures, always run single-threaded
    if [ "$q" == "-q" ] ; then j=1; fi 
 
    if ./regression-testing/run-single-test.perl --server --startuptest  ; then
-       ./bjam -j$j $q $a --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph $xmlrpc --with-regtest=$regtest $skipcompact $@ 
+       ./bjam -j$j --with-mm $q $a --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph $xmlrpc --with-regtest=$regtest $skipcompact $@ 
    else
-       ./bjam -j$j $q $a --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph --no-xmlrpc-c --with-regtest=$regtest $skipcompact $@ 
+       ./bjam -j$j --with-mm --with-mm-extras $q $a --with-irstlm=$irstlm --with-boost=$boost --with-cmph=$cmph --no-xmlrpc-c --with-regtest=$regtest $skipcompact $@ 
    fi
 fi
 
diff --git a/util/file_stream.hh b/util/file_stream.hh
index ae9ad5aa7..be26a0921 100644
--- a/util/file_stream.hh
+++ b/util/file_stream.hh
@@ -58,6 +58,7 @@ class FileStream : public FakeOStream<FileStream> {
     }
 
     FileStream &seekp(uint64_t to) {
+      flush();
       util::SeekOrThrow(fd_, to);
       return *this;
     }