diff options
author | Hieu Hoang <fishandfrolick@gmail.com> | 2012-06-01 02:32:24 +0400 |
---|---|---|
committer | Hieu Hoang <fishandfrolick@gmail.com> | 2012-06-01 02:32:24 +0400 |
commit | 417d5ee57ba980b4a88031550163a218bf9c5a1c (patch) | |
tree | 29662a0db328b4fc5fa3ece97cb5bbf931d79fee /biconcor/Vocabulary.h | |
parent | 48faedb3bdfe1507d19cf2b76fbc81f3be2ff244 (diff) |
move biconcor to /
Diffstat (limited to 'biconcor/Vocabulary.h')
-rw-r--r-- | biconcor/Vocabulary.h | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/biconcor/Vocabulary.h b/biconcor/Vocabulary.h new file mode 100644 index 000000000..674912006 --- /dev/null +++ b/biconcor/Vocabulary.h @@ -0,0 +1,39 @@ +// $Id: tables-core.h 1470 2007-10-02 21:43:54Z redpony $ + +#pragma once + +#include <iostream> +#include <cstdlib> +#include <string> +#include <map> +#include <vector> + +#define SAFE_GETLINE(_IS, _LINE, _SIZE, _DELIM) { \ + _IS.getline(_LINE, _SIZE, _DELIM); \ + if(_IS.fail() && !_IS.bad() && !_IS.eof()) _IS.clear(); \ + if (_IS.gcount() == _SIZE-1) { \ + std::cerr << "Line too long! Buffer overflow. Delete lines >=" \ + << _SIZE << " chars or raise MAX_LENGTH in phrase-extract/tables-core.cpp" \ + << std::endl; \ + std::exit(1); \ + } \ + } + +typedef std::string WORD; +typedef unsigned int WORD_ID; + +class Vocabulary +{ +public: + std::map<WORD, WORD_ID> lookup; + std::vector< WORD > vocab; + WORD_ID StoreIfNew( const WORD& ); + WORD_ID GetWordID( const WORD& ) const; + std::vector<WORD_ID> Tokenize( const char[] ); + inline WORD &GetWord( WORD_ID id ) const { + WORD &i = (WORD&) vocab[ id ]; + return i; + } + void Save(const std::string& fileName ) const; + void Load(const std::string& fileName ); +}; |