Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTetsuo Kiso <tetsuo-s@is.naist.jp>2012-03-18 00:58:40 +0400
committerTetsuo Kiso <tetsuo-s@is.naist.jp>2012-03-18 00:58:40 +0400
commit6b95a19eda818fb772767a0037c70a7bbb6c32e5 (patch)
treee1b7d608005bcc33ee00646263583e83d03a53a2 /mert/Reference.h
parent918bcafb808fe3067a4d689607bffb7dbbf0a914 (diff)
Create Reference class to clean up BleuScorer.
- Add an unit test for Reference. - Move functions to calculate the reference length from BleuScorer to Reference.
Diffstat (limited to 'mert/Reference.h')
-rw-r--r--mert/Reference.h78
1 files changed, 78 insertions, 0 deletions
diff --git a/mert/Reference.h b/mert/Reference.h
new file mode 100644
index 000000000..de5a6fecc
--- /dev/null
+++ b/mert/Reference.h
@@ -0,0 +1,78 @@
+#ifndef MERT_REFERENCE_H_
+#define MERT_REFERENCE_H_
+
+#include <algorithm>
+#include <climits>
+#include <iostream>
+#include <vector>
+
+#include "Ngram.h"
+
+// Refernece class is a reference translation for an output translation.
+class Reference {
+ public:
+ // for m_length
+ typedef std::vector<size_t>::iterator iterator;
+ typedef std::vector<size_t>::const_iterator const_iterator;
+
+ Reference() : m_counts(new NgramCounts) { }
+ ~Reference() { delete m_counts; }
+
+ NgramCounts* get_counts() { return m_counts; }
+ const NgramCounts* get_counts() const { return m_counts; }
+
+ iterator begin() { return m_length.begin(); }
+ const_iterator begin() const { return m_length.begin(); }
+ iterator end() { return m_length.end(); }
+ const_iterator end() const { return m_length.end(); }
+
+ void push_back(size_t len) { m_length.push_back(len); }
+
+ size_t num_references() const { return m_length.size(); }
+
+ int CalcAverage() const;
+ int CalcClosest(size_t length) const;
+ int CalcShortest() const;
+
+ private:
+ NgramCounts* m_counts;
+
+ // multiple reference lengths
+ std::vector<size_t> m_length;
+};
+
+inline int Reference::CalcAverage() const {
+ int total = 0;
+ for (size_t i = 0; i < m_length.size(); ++i) {
+ total += m_length[i];
+ }
+ return static_cast<int>(
+ static_cast<float>(total) / m_length.size());
+}
+
+inline int Reference::CalcClosest(size_t length) const {
+ int min_diff = INT_MAX;
+ int closest_ref_id = 0; // an index of the closest reference translation
+ for (size_t i = 0; i < m_length.size(); ++i) {
+ const int ref_length = m_length[i];
+ const int length_diff = abs(ref_length - static_cast<int>(length));
+ const int abs_min_diff = abs(min_diff);
+ // Look for the closest reference
+ if (length_diff < abs_min_diff) {
+ min_diff = ref_length - length;
+ closest_ref_id = i;
+ // if two references has the same closest length, take the shortest
+ } else if (length_diff == abs_min_diff) {
+ if (ref_length < static_cast<int>(m_length[closest_ref_id])) {
+ closest_ref_id = i;
+ }
+ }
+ }
+ return static_cast<int>(m_length[closest_ref_id]);
+}
+
+inline int Reference::CalcShortest() const {
+ return *std::min_element(m_length.begin(), m_length.end());
+}
+
+#endif // MERT_REFERENCE_H_