Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <fishandfrolick@gmail.com>2012-06-01 02:32:24 +0400
committerHieu Hoang <fishandfrolick@gmail.com>2012-06-01 02:32:24 +0400
commit417d5ee57ba980b4a88031550163a218bf9c5a1c (patch)
tree29662a0db328b4fc5fa3ece97cb5bbf931d79fee /biconcor/Vocabulary.h
parent48faedb3bdfe1507d19cf2b76fbc81f3be2ff244 (diff)
move biconcor to /
Diffstat (limited to 'biconcor/Vocabulary.h')
-rw-r--r--biconcor/Vocabulary.h39
1 files changed, 39 insertions, 0 deletions
diff --git a/biconcor/Vocabulary.h b/biconcor/Vocabulary.h
new file mode 100644
index 000000000..674912006
--- /dev/null
+++ b/biconcor/Vocabulary.h
@@ -0,0 +1,39 @@
+// $Id: tables-core.h 1470 2007-10-02 21:43:54Z redpony $
+
+#pragma once
+
+#include <iostream>
+#include <cstdlib>
+#include <string>
+#include <map>
+#include <vector>
+
+#define SAFE_GETLINE(_IS, _LINE, _SIZE, _DELIM) { \
+ _IS.getline(_LINE, _SIZE, _DELIM); \
+ if(_IS.fail() && !_IS.bad() && !_IS.eof()) _IS.clear(); \
+ if (_IS.gcount() == _SIZE-1) { \
+ std::cerr << "Line too long! Buffer overflow. Delete lines >=" \
+ << _SIZE << " chars or raise MAX_LENGTH in phrase-extract/tables-core.cpp" \
+ << std::endl; \
+ std::exit(1); \
+ } \
+ }
+
+typedef std::string WORD;
+typedef unsigned int WORD_ID;
+
+class Vocabulary
+{
+public:
+ std::map<WORD, WORD_ID> lookup;
+ std::vector< WORD > vocab;
+ WORD_ID StoreIfNew( const WORD& );
+ WORD_ID GetWordID( const WORD& ) const;
+ std::vector<WORD_ID> Tokenize( const char[] );
+ inline WORD &GetWord( WORD_ID id ) const {
+ WORD &i = (WORD&) vocab[ id ];
+ return i;
+ }
+ void Save(const std::string& fileName ) const;
+ void Load(const std::string& fileName );
+};