Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTetsuo Kiso <tetsuo-s@is.naist.jp>2012-03-14 17:14:11 +0400
committerTetsuo Kiso <tetsuo-s@is.naist.jp>2012-03-14 17:14:11 +0400
commitfba01c7cdfb10f8283cff44d78b40abb7ad61cc0 (patch)
treed7ef43ce9d5ce747c2afce705ede3dadc63ed9a3 /mert/Ngram.h
parent175b7aaf495963a8dd08525094073db06686adf8 (diff)
Create a header file for NgramCounts class.
The reason is that we want to add the unit test.
Diffstat (limited to 'mert/Ngram.h')
-rw-r--r--mert/Ngram.h97
1 files changed, 97 insertions, 0 deletions
diff --git a/mert/Ngram.h b/mert/Ngram.h
new file mode 100644
index 000000000..493ef5de8
--- /dev/null
+++ b/mert/Ngram.h
@@ -0,0 +1,97 @@
+#ifndef MERT_NGRAM_H_
+#define MERT_NGRAM_H_
+
+#include <vector>
+#include <map>
+
+/** A simple STL-std::map based n-gram counts. Basically, we provide
+ * typical accessors and mutaors, but we intentionally does not allow
+ * erasing elements.
+ */
+class NgramCounts {
+ public:
+ // Used to construct the ngram map
+ struct NgramComparator {
+ bool operator()(const std::vector<int>& a, const std::vector<int>& b) const {
+ size_t i;
+ const size_t as = a.size();
+ const size_t bs = b.size();
+ for (i = 0; i < as && i < bs; ++i) {
+ if (a[i] < b[i]) {
+ return true;
+ }
+ if (a[i] > b[i]) {
+ return false;
+ }
+ }
+ // entries are equal, shortest wins
+ return as < bs;
+ }
+ };
+
+ typedef std::vector<int> Key;
+ typedef int Value;
+ typedef std::map<Key, Value, NgramComparator>::iterator iterator;
+ typedef std::map<Key, Value, NgramComparator>::const_iterator const_iterator;
+
+ NgramCounts() : kDefaultCount(1) { }
+ virtual ~NgramCounts() { }
+
+ /**
+ * If the specified "ngram" is found, we add counts.
+ * If not, we insert the default count in the container. */
+ void add(const Key& ngram) {
+ const_iterator it = find(ngram);
+ if (it != end()) {
+ m_counts[ngram] = it->second + 1;
+ } else {
+ m_counts[ngram] = kDefaultCount;
+ }
+ }
+
+ /**
+ * Clear all elments in the container.
+ */
+ void clear() { m_counts.clear(); }
+
+ /**
+ * Return true iff the container is empty.
+ */
+ bool empty() const { return m_counts.empty(); }
+
+ /**
+ * Return the the number of elements in the container.
+ */
+ size_t size() const { return m_counts.size(); }
+
+ size_t max_size() const { return m_counts.max_size(); }
+
+ // Note: This is mainly used by unit tests.
+ int get_default_count() const { return kDefaultCount; }
+
+ /**
+ * Return true iff the specified "ngram" is found in the container.
+ */
+ bool lookup(const Key& ngram, Value* v) const {
+ const_iterator it = m_counts.find(ngram);
+ if (it == m_counts.end()) return false;
+ *v = it->second;
+ return true;
+ }
+
+ iterator find(const Key& ngram) { return m_counts.find(ngram); }
+ const_iterator find(const Key& ngram) const { return m_counts.find(ngram); }
+
+ Value& operator[](const Key& ngram) { return m_counts[ngram]; }
+
+ iterator begin() { return m_counts.begin(); }
+ const_iterator begin() const { return m_counts.begin(); }
+ iterator end() { return m_counts.end(); }
+ const_iterator end() const { return m_counts.end(); }
+
+ private:
+ const int kDefaultCount;
+ std::map<Key, Value, NgramComparator> m_counts;
+};
+
+#endif // MERT_NGRAM_H_