From 589a79fa094a1d36dfa110d2bbb58be078c7a05b Mon Sep 17 00:00:00 2001 From: graehl Date: Wed, 24 Jun 2015 23:37:11 -0700 Subject: more tab->space --- src/vocabulary.h | 122 +++++++++++++++++++++++++++---------------------------- 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/src/vocabulary.h b/src/vocabulary.h index a987522..fe08d86 100644 --- a/src/vocabulary.h +++ b/src/vocabulary.h @@ -16,80 +16,80 @@ struct compare_second }; class vocabulary { - std::vector m_words; - boost::unordered_map m_index; - int unk; - -public: - vocabulary() - { - unk = insert_word(""); - } + std::vector m_words; + boost::unordered_map m_index; + int unk; + + public: + vocabulary() + { + unk = insert_word(""); + } - vocabulary(const std::vector &words) + vocabulary(const std::vector &words) : m_words(words) + { + for (int i=0; i"]; + } + + int lookup_word(const std::string &word) const + { + boost::unordered_map::const_iterator pos = m_index.find(word); + if (pos != m_index.end()) + return pos->second; + else + return unk; + } + + // lookup word using custom unknown-word id + int lookup_word(const std::string &word, int unk) const + { + boost::unordered_map::const_iterator pos = m_index.find(word); + if (pos != m_index.end()) + return pos->second; + else + return unk; + } + + int insert_word(const std::string &word) + { + int i = size(); + bool inserted = m_index.insert(make_pair(word, i)).second; + if (inserted) { - for (int i=0; i"]; + m_words.push_back(word); } + return i; + } - int lookup_word(const std::string &word) const - { - boost::unordered_map::const_iterator pos = m_index.find(word); - if (pos != m_index.end()) - return pos->second; - else - return unk; - } + int size() const { return m_words.size(); } - // lookup word using custom unknown-word id - int lookup_word(const std::string &word, int unk) const - { - boost::unordered_map::const_iterator pos = m_index.find(word); - if (pos != m_index.end()) - return pos->second; - else - return unk; - } - - int insert_word(const std::string &word) - { - int i = size(); - bool inserted = m_index.insert(make_pair(word, i)).second; - if (inserted) - { - m_words.push_back(word); - } - return i; - } + // Inserts the most-frequent words from counts until vocab_size words are reached. + // counts is a collection of pair + template + int insert_most_frequent(const Map &counts, int vocab_size) + { + typedef std::pair stringint; - int size() const { return m_words.size(); } + std::priority_queue,compare_second > + q(compare_second(), std::vector(counts.begin(), counts.end())); - // Inserts the most-frequent words from counts until vocab_size words are reached. - // counts is a collection of pair - template - int insert_most_frequent(const Map &counts, int vocab_size) + int inserted = 0; + while (size() < vocab_size && !q.empty()) { - typedef std::pair stringint; - - std::priority_queue,compare_second > - q(compare_second(), std::vector(counts.begin(), counts.end())); - - int inserted = 0; - while (size() < vocab_size && !q.empty()) - { - insert_word(q.top().first); - q.pop(); - inserted++; - } - return inserted; + insert_word(q.top().first); + q.pop(); + inserted++; } + return inserted; + } - const std::vector &words() const { return m_words; } + const std::vector &words() const { return m_words; } - const boost::unordered_map& get_idmap() const { return m_index; } + const boost::unordered_map& get_idmap() const { return m_index; } }; } // namespace nplm -- cgit v1.2.3