Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTetsuo Kiso <tetsuo-s@is.naist.jp>2012-03-19 18:21:02 +0400
committerTetsuo Kiso <tetsuo-s@is.naist.jp>2012-03-19 18:21:02 +0400
commit2b28072f7a9e651a0e87fb84ec5c8956e2bbed47 (patch)
tree32ba4ea5b0f6d4f8fd260a6489d549a7ad20a28a /mert/Ngram.h
parentf686e8771a5db09e32474ed0735dbdef275158d3 (diff)
Move Encoder class from Scorer.h to Ngram.h.
To add unit tests.
Diffstat (limited to 'mert/Ngram.h')
-rw-r--r--mert/Ngram.h78
1 files changed, 67 insertions, 11 deletions
diff --git a/mert/Ngram.h b/mert/Ngram.h
index 493ef5de8..d2c5f3932 100644
--- a/mert/Ngram.h
+++ b/mert/Ngram.h
@@ -3,6 +3,62 @@
#include <vector>
#include <map>
+#include <string>
+
+/**
+ * A map to manage vocaburaries.
+ */
+class Encoder {
+ public:
+ typedef std::map<std::string, int>::iterator iterator;
+ typedef std::map<std::string, int>::const_iterator const_iterator;
+
+ Encoder() {}
+ virtual ~Encoder() {}
+
+ /** Returns the assiged id for given "token". */
+ int Encode(const std::string& token) {
+ iterator it = m_vocab.find(token);
+ int encoded_token;
+ if (it == m_vocab.end()) {
+ // Add an new entry to the vocaburary.
+ encoded_token = static_cast<int>(m_vocab.size());
+ m_vocab[token] = encoded_token;
+ } else {
+ encoded_token = it->second;
+ }
+ return encoded_token;
+ }
+
+ /**
+ * Return true iff the specified "str" is found in the container.
+ */
+ bool Lookup(const std::string&str , int* v) const {
+ const_iterator it = m_vocab.find(str);
+ if (it == m_vocab.end()) return false;
+ *v = it->second;
+ return true;
+ }
+
+ void clear() { m_vocab.clear(); }
+
+ bool empty() const { return m_vocab.empty(); }
+
+ size_t size() const { return m_vocab.size(); }
+
+ iterator find(const std::string& str) { return m_vocab.find(str); }
+ const_iterator find(const std::string& str) const { return m_vocab.find(str); }
+
+ int& operator[](const std::string& str) { return m_vocab[str]; }
+
+ iterator begin() { return m_vocab.begin(); }
+ const_iterator begin() const { return m_vocab.begin(); }
+ iterator end() { return m_vocab.end(); }
+ const_iterator end() const { return m_vocab.end(); }
+
+ private:
+ std::map<std::string, int> m_vocab;
+};
/** A simple STL-std::map based n-gram counts. Basically, we provide
* typical accessors and mutaors, but we intentionally does not allow
@@ -40,7 +96,7 @@ class NgramCounts {
/**
* If the specified "ngram" is found, we add counts.
* If not, we insert the default count in the container. */
- void add(const Key& ngram) {
+ void Add(const Key& ngram) {
const_iterator it = find(ngram);
if (it != end()) {
m_counts[ngram] = it->second + 1;
@@ -50,6 +106,16 @@ class NgramCounts {
}
/**
+ * Return true iff the specified "ngram" is found in the container.
+ */
+ bool Lookup(const Key& ngram, Value* v) const {
+ const_iterator it = m_counts.find(ngram);
+ if (it == m_counts.end()) return false;
+ *v = it->second;
+ return true;
+ }
+
+ /**
* Clear all elments in the container.
*/
void clear() { m_counts.clear(); }
@@ -69,16 +135,6 @@ class NgramCounts {
// Note: This is mainly used by unit tests.
int get_default_count() const { return kDefaultCount; }
- /**
- * Return true iff the specified "ngram" is found in the container.
- */
- bool lookup(const Key& ngram, Value* v) const {
- const_iterator it = m_counts.find(ngram);
- if (it == m_counts.end()) return false;
- *v = it->second;
- return true;
- }
-
iterator find(const Key& ngram) { return m_counts.find(ngram); }
const_iterator find(const Key& ngram) const { return m_counts.find(ngram); }