diff options
author | graehl <graehl@gmail.com> | 2015-06-25 09:37:11 +0300 |
---|---|---|
committer | graehl <graehl@gmail.com> | 2015-06-25 09:37:11 +0300 |
commit | 589a79fa094a1d36dfa110d2bbb58be078c7a05b (patch) | |
tree | ebe6b43a039b29918ea1c3833c96ee77e5695544 | |
parent | 37e397f526fc207dea498356e890ad085a733ae8 (diff) |
more tab->space
-rw-r--r-- | src/vocabulary.h | 122 |
1 files changed, 61 insertions, 61 deletions
diff --git a/src/vocabulary.h b/src/vocabulary.h index a987522..fe08d86 100644 --- a/src/vocabulary.h +++ b/src/vocabulary.h @@ -16,80 +16,80 @@ struct compare_second }; class vocabulary { - std::vector<std::string> m_words; - boost::unordered_map<std::string, int> m_index; - int unk; - -public: - vocabulary() - { - unk = insert_word("<unk>"); - } + std::vector<std::string> m_words; + boost::unordered_map<std::string, int> m_index; + int unk; + + public: + vocabulary() + { + unk = insert_word("<unk>"); + } - vocabulary(const std::vector<std::string> &words) + vocabulary(const std::vector<std::string> &words) : m_words(words) + { + for (int i=0; i<words.size(); i++) + m_index[words[i]] = i; + unk = m_index["<unk>"]; + } + + int lookup_word(const std::string &word) const + { + boost::unordered_map<std::string, int>::const_iterator pos = m_index.find(word); + if (pos != m_index.end()) + return pos->second; + else + return unk; + } + + // lookup word using custom unknown-word id + int lookup_word(const std::string &word, int unk) const + { + boost::unordered_map<std::string, int>::const_iterator pos = m_index.find(word); + if (pos != m_index.end()) + return pos->second; + else + return unk; + } + + int insert_word(const std::string &word) + { + int i = size(); + bool inserted = m_index.insert(make_pair(word, i)).second; + if (inserted) { - for (int i=0; i<words.size(); i++) - m_index[words[i]] = i; - unk = m_index["<unk>"]; + m_words.push_back(word); } + return i; + } - int lookup_word(const std::string &word) const - { - boost::unordered_map<std::string, int>::const_iterator pos = m_index.find(word); - if (pos != m_index.end()) - return pos->second; - else - return unk; - } + int size() const { return m_words.size(); } - // lookup word using custom unknown-word id - int lookup_word(const std::string &word, int unk) const - { - boost::unordered_map<std::string, int>::const_iterator pos = m_index.find(word); - if (pos != m_index.end()) - return pos->second; - else - return unk; - } - - int insert_word(const std::string &word) - { - int i = size(); - bool inserted = m_index.insert(make_pair(word, i)).second; - if (inserted) - { - m_words.push_back(word); - } - return i; - } + // Inserts the most-frequent words from counts until vocab_size words are reached. + // counts is a collection of pair<string,int> + template <typename Map> + int insert_most_frequent(const Map &counts, int vocab_size) + { + typedef std::pair<std::string,int> stringint; - int size() const { return m_words.size(); } + std::priority_queue<stringint,std::vector<stringint>,compare_second<stringint> > + q(compare_second<stringint>(), std::vector<stringint>(counts.begin(), counts.end())); - // Inserts the most-frequent words from counts until vocab_size words are reached. - // counts is a collection of pair<string,int> - template <typename Map> - int insert_most_frequent(const Map &counts, int vocab_size) + int inserted = 0; + while (size() < vocab_size && !q.empty()) { - typedef std::pair<std::string,int> stringint; - - std::priority_queue<stringint,std::vector<stringint>,compare_second<stringint> > - q(compare_second<stringint>(), std::vector<stringint>(counts.begin(), counts.end())); - - int inserted = 0; - while (size() < vocab_size && !q.empty()) - { - insert_word(q.top().first); - q.pop(); - inserted++; - } - return inserted; + insert_word(q.top().first); + q.pop(); + inserted++; } + return inserted; + } - const std::vector<std::string> &words() const { return m_words; } + const std::vector<std::string> &words() const { return m_words; } - const boost::unordered_map<std::string, int>& get_idmap() const { return m_index; } + const boost::unordered_map<std::string, int>& get_idmap() const { return m_index; } }; } // namespace nplm |