defer BackwardTest

author: Hieu Hoang <hieu@hoang.co.uk> 2013-09-30 20:39:09 +0400
committer: Hieu Hoang <hieu@hoang.co.uk> 2013-09-30 20:39:09 +0400
commit: 5472311e5f88fa590cdc0e400aaa0cefb49abd90 (patch)
tree: 05b2d6c2565652cd44fa9df636e2674905c22122 /defer
parent: 6bce68851762045f40c862402abca98b51f8c3be (diff)
1 files changed, 378 insertions, 0 deletions
diff --git a/defer/BackwardTest.cpp b/defer/BackwardTest.cpp
new file mode 100644
index 000000000..7fed72270
--- /dev/null
+++ b/defer/BackwardTest.cpp
@@ -0,0 +1,378 @@
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2010 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+#define BOOST_TEST_MODULE BackwardTest
+#include <boost/test/unit_test.hpp>
+
+#include "lm/config.hh"
+#include "lm/left.hh"
+#include "lm/model.hh"
+#include "lm/state.hh"
+
+#include "moses/Sentence.h"
+#include "moses/TypeDef.h"
+
+#include "moses/StaticData.h"
+
+//#include "BackwardLMState.h"
+#include "moses/LM/Backward.h"
+#include "moses/LM/BackwardLMState.h"
+#include "moses/Util.h"
+
+#include "lm/state.hh"
+#include "lm/left.hh"
+
+#include <vector>
+
+using namespace Moses;
+//using namespace std;
+/*
+template <class M> void Foo() {
+
+
+  Moses::BackwardLanguageModel<M> *backwardLM;
+  // = new Moses::BackwardLanguageModel<M>( filename, factorType, lazy );
+
+
+}
+template <class M> void Everything() {
+  //  Foo<M>();
+}
+*/
+
+namespace Moses
+{
+
+// Apparently some Boost versions use templates and are pretty strict about types matching.
+#define SLOPPY_CHECK_CLOSE(ref, value, tol) BOOST_CHECK_CLOSE(static_cast<double>(ref), static_cast<double>(value), static_cast<double>(tol));
+
+class BackwardLanguageModelTest
+{
+
+public:
+  BackwardLanguageModelTest() :
+    dummyInput(new Sentence()),
+    backwardLM(
+      static_cast< BackwardLanguageModel<lm::ngram::ProbingModel> * >(
+        ConstructBackwardLM(
+          "LM1=1.0",
+          boost::unit_test::framework::master_test_suite().argv[1],
+          0,
+          false)
+      )
+    ) {
+    // This space intentionally left blank
+  }
+
+  ~BackwardLanguageModelTest() {
+    delete dummyInput;
+    delete backwardLM;
+  }
+
+  void testEmptyHypothesis() {
+    FFState *ffState = const_cast< FFState * >(backwardLM->EmptyHypothesisState( *dummyInput ));
+
+    BOOST_CHECK( ffState != NULL );
+
+    delete ffState;
+  }
+
+  void testCalcScore() {
+
+    double p_the      = -1.383059;
+    double p_licenses = -2.360783;
+    double p_for      = -1.661813;
+    double p_most     = -2.360783;
+    //      double p_software = -1.62042;
+
+    double p_the_licenses  = -0.9625873;
+    double p_licenses_for  = -1.661557;
+    double p_for_most      = -0.4526253;
+    //      double p_most_software = -1.70295;
+
+    double p_the_licenses_for  = p_the_licenses + p_licenses_for;
+    //      double p_licenses_for_most = p_licenses_for + p_for_most;
+
+    // the
+    {
+      Phrase phrase;
+      BOOST_CHECK( phrase.GetSize() == 0 );
+
+      std::vector<FactorType> outputFactorOrder;
+      outputFactorOrder.push_back(0);
+
+      phrase.CreateFromString(
+        Input,
+        outputFactorOrder,
+        "the",
+        StaticData::Instance().GetFactorDelimiter(),
+        NULL);
+
+      BOOST_CHECK( phrase.GetSize() == 1 );
+
+      float fullScore;
+      float ngramScore;
+      size_t oovCount;
+      backwardLM->CalcScore(phrase, fullScore, ngramScore, oovCount);
+
+      BOOST_CHECK( oovCount == 0 );
+      SLOPPY_CHECK_CLOSE( TransformLMScore(p_the), fullScore, 0.01);
+      SLOPPY_CHECK_CLOSE( TransformLMScore( 0.0 ), ngramScore, 0.01);
+    }
+
+    // the licenses
+    {
+      Phrase phrase;
+      BOOST_CHECK( phrase.GetSize() == 0 );
+
+      std::vector<FactorType> outputFactorOrder;
+      outputFactorOrder.push_back(0);
+
+      phrase.CreateFromString(
+        Input,
+        outputFactorOrder,
+        "the licenses",
+        StaticData::Instance().GetFactorDelimiter(),
+        NULL);
+
+      BOOST_CHECK( phrase.GetSize() == 2 );
+
+      float fullScore;
+      float ngramScore;
+      size_t oovCount;
+      backwardLM->CalcScore(phrase, fullScore, ngramScore, oovCount);
+
+      BOOST_CHECK( oovCount == 0 );
+      SLOPPY_CHECK_CLOSE( TransformLMScore(p_licenses + p_the_licenses), fullScore, 0.01);
+      SLOPPY_CHECK_CLOSE( TransformLMScore( 0.0 ), ngramScore, 0.01);
+    }
+
+    // the licenses for
+    {
+      Phrase phrase;
+      BOOST_CHECK( phrase.GetSize() == 0 );
+
+      std::vector<FactorType> outputFactorOrder;
+      outputFactorOrder.push_back(0);
+
+      phrase.CreateFromString(
+        Input,
+        outputFactorOrder,
+        "the licenses for",
+        StaticData::Instance().GetFactorDelimiter(),
+        NULL);
+
+      BOOST_CHECK( phrase.GetSize() == 3 );
+
+      float fullScore;
+      float ngramScore;
+      size_t oovCount;
+      backwardLM->CalcScore(phrase, fullScore, ngramScore, oovCount);
+
+      BOOST_CHECK( oovCount == 0 );
+      SLOPPY_CHECK_CLOSE( TransformLMScore( p_the_licenses_for ), ngramScore, 0.01);
+      SLOPPY_CHECK_CLOSE( TransformLMScore(p_for + p_licenses_for + p_the_licenses), fullScore, 0.01);
+    }
+
+    // the licenses for most
+    {
+      Phrase phrase;
+      BOOST_CHECK( phrase.GetSize() == 0 );
+
+      std::vector<FactorType> outputFactorOrder;
+      outputFactorOrder.push_back(0);
+
+      phrase.CreateFromString(
+        Input,
+        outputFactorOrder,
+        "the licenses for most",
+        StaticData::Instance().GetFactorDelimiter(),
+        NULL);
+
+      BOOST_CHECK( phrase.GetSize() == 4 );
+
+      float fullScore;
+      float ngramScore;
+      size_t oovCount;
+      backwardLM->CalcScore(phrase, fullScore, ngramScore, oovCount);
+
+      BOOST_CHECK( oovCount == 0 );
+      SLOPPY_CHECK_CLOSE( TransformLMScore( p_the_licenses + p_licenses_for ), ngramScore, 0.01);
+      SLOPPY_CHECK_CLOSE( TransformLMScore(p_most + p_for_most + p_licenses_for + p_the_licenses), fullScore, 0.01);
+    }
+
+  }
+
+  void testEvaluate() {
+
+    FFState *nextState;
+    FFState *prevState = const_cast< FFState * >(backwardLM->EmptyHypothesisState( *dummyInput ));
+
+    double p_most     = -2.360783;
+    double p_for      = -1.661813;
+    double p_licenses = -2.360783;
+    double p_the      = -1.383059;
+    double p_eos      = -1.457693;
+
+    double p_most_for      = -0.4526253;
+    double p_for_licenses  = -1.661557;
+    double p_licenses_the = -0.9625873;
+    double p_the_eos = -1.940311;
+
+
+    // the
+    {
+      Phrase phrase;
+      BOOST_CHECK( phrase.GetSize() == 0 );
+
+      std::vector<FactorType> outputFactorOrder;
+      outputFactorOrder.push_back(0);
+
+      phrase.CreateFromString(
+        Input,
+        outputFactorOrder,
+        "the",
+        StaticData::Instance().GetFactorDelimiter(),
+        NULL);
+
+      BOOST_CHECK( phrase.GetSize() == 1 );
+
+      float score;
+      nextState = backwardLM->Evaluate(phrase, prevState, score);
+
+      // p(the) * p(</s> | the) / p(</s>)
+      SLOPPY_CHECK_CLOSE( (p_the + p_the_eos - p_eos), score, 0.01);
+
+      delete prevState;
+      prevState = nextState;
+
+    }
+
+    // the licenses
+    {
+      Phrase phrase;
+      BOOST_CHECK( phrase.GetSize() == 0 );
+
+      std::vector<FactorType> outputFactorOrder;
+      outputFactorOrder.push_back(0);
+
+      phrase.CreateFromString(
+        Input,
+        outputFactorOrder,
+        "licenses",
+        StaticData::Instance().GetFactorDelimiter(),
+        NULL);
+
+      BOOST_CHECK( phrase.GetSize() == 1 );
+
+      float score;
+      nextState = backwardLM->Evaluate(phrase, prevState, score);
+
+      // p(licenses) * p(licenses | the) / p(the)
+      SLOPPY_CHECK_CLOSE( (p_licenses + p_licenses_the - p_the), score, 0.01);
+
+      delete prevState;
+      prevState = nextState;
+
+    }
+
+    // the licenses for
+    {
+      Phrase phrase;
+      BOOST_CHECK( phrase.GetSize() == 0 );
+
+      std::vector<FactorType> outputFactorOrder;
+      outputFactorOrder.push_back(0);
+
+      phrase.CreateFromString(
+        Input,
+        outputFactorOrder,
+        "for",
+        StaticData::Instance().GetFactorDelimiter(),
+        NULL);
+
+      BOOST_CHECK( phrase.GetSize() == 1 );
+
+      float score;
+      nextState = backwardLM->Evaluate(phrase, prevState, score);
+
+      // p(for) * p(for | licenses) / p(licenses)
+      SLOPPY_CHECK_CLOSE( (p_for + p_for_licenses - p_licenses), score, 0.01);
+
+      delete prevState;
+      prevState = nextState;
+
+    }
+
+    // the licenses for most
+    {
+      Phrase phrase;
+      BOOST_CHECK( phrase.GetSize() == 0 );
+
+      std::vector<FactorType> outputFactorOrder;
+      outputFactorOrder.push_back(0);
+
+      phrase.CreateFromString(
+        Input,
+        outputFactorOrder,
+        "most",
+        StaticData::Instance().GetFactorDelimiter(),
+        NULL);
+
+      BOOST_CHECK( phrase.GetSize() == 1 );
+
+      float score;
+      nextState = backwardLM->Evaluate(phrase, prevState, score);
+
+      // p(most) * p(most | for) / p(for)
+      SLOPPY_CHECK_CLOSE( (p_most + p_most_for - p_for), score, 0.01);
+
+      delete prevState;
+      prevState = nextState;
+
+    }
+
+    delete prevState;
+  }
+
+private:
+  const Sentence *dummyInput;
+  BackwardLanguageModel<lm::ngram::ProbingModel> *backwardLM;
+
+};
+
+
+}
+
+const char *FileLocation()
+{
+  if (boost::unit_test::framework::master_test_suite().argc < 2) {
+    BOOST_FAIL("Jamfile must specify arpa file for this test, but did not");
+  }
+  return boost::unit_test::framework::master_test_suite().argv[1];
+}
+
+BOOST_AUTO_TEST_CASE(ProbingAll)
+{
+
+  BackwardLanguageModelTest test;
+  test.testEmptyHypothesis();
+  test.testCalcScore();
+  test.testEvaluate();
+
+}
author	Hieu Hoang <hieu@hoang.co.uk>	2013-09-30 20:39:09 +0400
committer	Hieu Hoang <hieu@hoang.co.uk>	2013-09-30 20:39:09 +0400
commit	5472311e5f88fa590cdc0e400aaa0cefb49abd90 (patch)
tree	05b2d6c2565652cd44fa9df636e2674905c22122 /defer
parent	6bce68851762045f40c862402abca98b51f8c3be (diff)