diff options
Diffstat (limited to 'phrase-extract')
-rw-r--r-- | phrase-extract/extract-lex-main.cpp | 9 | ||||
-rw-r--r-- | phrase-extract/extract-lex.h | 53 |
2 files changed, 5 insertions, 57 deletions
diff --git a/phrase-extract/extract-lex-main.cpp b/phrase-extract/extract-lex-main.cpp index f63015a6a..78182396d 100644 --- a/phrase-extract/extract-lex-main.cpp +++ b/phrase-extract/extract-lex-main.cpp @@ -4,6 +4,7 @@ #include <vector> #include "extract-lex.h" #include "InputFileStream.h" +#include "moses/Util.h" using namespace std; using namespace MosesTraining; @@ -53,9 +54,9 @@ int main(int argc, char* argv[]) assert(isAlign); vector<string> toksTarget, toksSource, toksAlign; - Tokenize(toksTarget, lineTarget); - Tokenize(toksSource, lineSource); - Tokenize(toksAlign, lineAlign); + Moses::Tokenize(toksTarget, lineTarget); + Moses::Tokenize(toksSource, lineSource); + Moses::Tokenize(toksAlign, lineAlign); /* cerr << endl @@ -99,7 +100,7 @@ void ExtractLex::Process(vector<string> &toksTarget, vector<string> &toksSource, const string &alignTok = *iterAlign; vector<size_t> alignPos; - Tokenize(alignPos, alignTok, "-"); + Moses::Tokenize(alignPos, alignTok, "-"); assert(alignPos.size() == 2); if (alignPos[0] >= toksSource.size()) { diff --git a/phrase-extract/extract-lex.h b/phrase-extract/extract-lex.h index 044a32cf8..1d49465c8 100644 --- a/phrase-extract/extract-lex.h +++ b/phrase-extract/extract-lex.h @@ -9,59 +9,6 @@ namespace MosesTraining { - -//! convert string to variable of type T. Used to reading floats, int etc from files -template<typename T> -inline T Scan(const std::string &input) -{ - std::stringstream stream(input); - T ret; - stream >> ret; - return ret; -} - - -//! speeded up version of above -template<typename T> -inline void Scan(std::vector<T> &output, const std::vector< std::string > &input) -{ - output.resize(input.size()); - for (size_t i = 0 ; i < input.size() ; i++) { - output[i] = Scan<T>( input[i] ); - } -} - - -inline void Tokenize(std::vector<std::string> &output - , const std::string& str - , const std::string& delimiters = " \t") -{ - // Skip delimiters at beginning. - std::string::size_type lastPos = str.find_first_not_of(delimiters, 0); - // Find first "non-delimiter". - std::string::size_type pos = str.find_first_of(delimiters, lastPos); - - while (std::string::npos != pos || std::string::npos != lastPos) { - // Found a token, add it to the vector. - output.push_back(str.substr(lastPos, pos - lastPos)); - // Skip delimiters. Note the "not_of" - lastPos = str.find_first_not_of(delimiters, pos); - // Find next "non-delimiter" - pos = str.find_first_of(delimiters, lastPos); - } -} - -// speeded up version of above -template<typename T> -inline void Tokenize( std::vector<T> &output - , const std::string &input - , const std::string& delimiters = " \t") -{ - std::vector<std::string> stringVector; - Tokenize(stringVector, input, delimiters); - return Scan<T>(output, stringVector ); -} - class WordCount { friend std::ostream& operator<<(std::ostream&, const WordCount&); |