Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTetsuo Kiso <tetsuo-s@is.naist.jp>2012-03-20 00:52:35 +0400
committerTetsuo Kiso <tetsuo-s@is.naist.jp>2012-03-20 00:52:35 +0400
commit2fdb47fe67a0bcca8cebdb3f86c2f2a5d7fb2055 (patch)
tree32caf8c3a66f6f99cd1e07cf1740b1e13205ffb0 /mert/BleuScorerTest.cpp
parent8987fed667c8c7cedfb714b28a061fb7b8030d6c (diff)
Add an unit test for BleuScorer.
The test contains only ngram counting. More tests for calculating BLEU score are reuiqred.
Diffstat (limited to 'mert/BleuScorerTest.cpp')
-rw-r--r--mert/BleuScorerTest.cpp155
1 files changed, 155 insertions, 0 deletions
diff --git a/mert/BleuScorerTest.cpp b/mert/BleuScorerTest.cpp
new file mode 100644
index 000000000..2f5c8b557
--- /dev/null
+++ b/mert/BleuScorerTest.cpp
@@ -0,0 +1,155 @@
+#include "BleuScorer.h"
+
+#define BOOST_TEST_MODULE MertBleuScorer
+#include <boost/test/unit_test.hpp>
+
+#include "Ngram.h"
+#include "Vocabulary.h"
+#include "Util.h"
+
+namespace {
+
+NgramCounts* g_counts = NULL;
+
+NgramCounts* GetNgramCounts() {
+ assert(g_counts);
+ return g_counts;
+}
+
+void SetNgramCounts(NgramCounts* counts) {
+ g_counts = counts;
+}
+
+struct Unigram {
+ Unigram(const std::string& a) {
+ instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(a));
+ }
+ NgramCounts::Key instance;
+};
+
+struct Bigram {
+ Bigram(const std::string& a, const std::string& b) {
+ instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(a));
+ instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(b));
+ }
+ NgramCounts::Key instance;
+};
+
+struct Trigram {
+ Trigram(const std::string& a, const std::string& b, const std::string& c) {
+ instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(a));
+ instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(b));
+ instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(c));
+ }
+ NgramCounts::Key instance;
+};
+
+struct Fourgram {
+ Fourgram(const std::string& a, const std::string& b,
+ const std::string& c, const std::string& d) {
+ instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(a));
+ instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(b));
+ instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(c));
+ instance.push_back(mert::VocabularyFactory::GetVocabulary()->Encode(d));
+ }
+ NgramCounts::Key instance;
+};
+
+void CheckUnigram(const std::string& str) {
+ Unigram unigram(str);
+ NgramCounts::Value v;
+ BOOST_CHECK(GetNgramCounts()->Lookup(unigram.instance, &v));
+}
+
+void CheckBigram(const std::string& a, const std::string& b) {
+ Bigram bigram(a, b);
+ NgramCounts::Value v;
+ BOOST_CHECK(GetNgramCounts()->Lookup(bigram.instance, &v));
+}
+
+void CheckTrigram(const std::string& a, const std::string& b,
+ const std::string& c) {
+ Trigram trigram(a, b, c);
+ NgramCounts::Value v;
+ BOOST_CHECK(GetNgramCounts()->Lookup(trigram.instance, &v));
+}
+
+void CheckFourgram(const std::string& a, const std::string& b,
+ const std::string& c, const std::string& d) {
+ Fourgram fourgram(a, b, c, d);
+ NgramCounts::Value v;
+ BOOST_CHECK(GetNgramCounts()->Lookup(fourgram.instance, &v));
+}
+
+} // namespace
+
+BOOST_AUTO_TEST_CASE(bleu_reference_type) {
+ BleuScorer scorer;
+ // BleuScorer will use "closest" by default.
+ BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::CLOSEST);
+
+ scorer.SetReferenceLengthType(BleuScorer::AVERAGE);
+ BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::AVERAGE);
+
+ scorer.SetReferenceLengthType(BleuScorer::SHORTEST);
+ BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::SHORTEST);
+}
+
+BOOST_AUTO_TEST_CASE(bleu_count_ngrams) {
+ BleuScorer scorer;
+
+ std::string line = "I saw a girl with a telescope .";
+ // In the above string, we will get the 25 ngrams.
+ //
+ // unigram: "I", "saw", "a", "girl", "with", "telescope", "."
+ // bigram: "I saw", "saw a", "a girl", "girl with", "with a", "a telescope"
+ // "telescope ."
+ // trigram: "I saw a", "saw a girl", "a girl with", "girl with a",
+ // "with a telescope", "a telescope ."
+ // 4-gram: "I saw a girl", "saw a girl with", "a girl with a",
+ // "girl with a telescope", "with a telescope ."
+ NgramCounts counts;
+ BOOST_REQUIRE(scorer.CountNgrams(line, counts, kBleuNgramOrder) == 8);
+ BOOST_CHECK_EQUAL(25, counts.size());
+
+ mert::Vocabulary* vocab = scorer.GetVocab();
+ BOOST_CHECK_EQUAL(7, vocab->size());
+
+ std::vector<std::string> res;
+ Tokenize(line.c_str(), ' ', &res);
+ std::vector<int> ids(res.size());
+ for (std::size_t i = 0; i < res.size(); ++i) {
+ BOOST_CHECK(vocab->Lookup(res[i], &ids[i]));
+ }
+
+ SetNgramCounts(&counts);
+
+ // unigram
+ for (std::size_t i = 0; i < res.size(); ++i) {
+ CheckUnigram(res[i]);
+ }
+
+ // bigram
+ CheckBigram("I", "saw");
+ CheckBigram("saw", "a");
+ CheckBigram("a", "girl");
+ CheckBigram("girl", "with");
+ CheckBigram("with", "a");
+ CheckBigram("a", "telescope");
+ CheckBigram("telescope", ".");
+
+ // trigram
+ CheckTrigram("I", "saw", "a");
+ CheckTrigram("saw", "a", "girl");
+ CheckTrigram("a", "girl", "with");
+ CheckTrigram("girl", "with", "a");
+ CheckTrigram("with", "a", "telescope");
+ CheckTrigram("a", "telescope", ".");
+
+ // 4-gram
+ CheckFourgram("I", "saw", "a", "girl");
+ CheckFourgram("saw", "a", "girl", "with");
+ CheckFourgram("a", "girl", "with", "a");
+ CheckFourgram("girl", "with", "a", "telescope");
+ CheckFourgram("with", "a", "telescope", ".");
+}