diff options
author | Matthias Huck <mhuck@inf.ed.ac.uk> | 2015-07-24 23:01:13 +0300 |
---|---|---|
committer | Matthias Huck <mhuck@inf.ed.ac.uk> | 2015-07-24 23:01:13 +0300 |
commit | 21aaec0105ffc69c48a4c8977b965af3e05c7a04 (patch) | |
tree | d9f0e211b769e3eaeb856051a2f92894f4d7a2dd | |
parent | 472529ade857a69e01f81cac6675fa7eeb9c2ba9 (diff) |
Removed some duplicate code.
Can we move all or parts of moses/Util to util/, and
from the Moses namespace to the util namespace?
There's quite some common functionality in it that is not only relevant
to the decoder, but also to phrase extraction and possibly other parts
of the toolkit.
-rw-r--r-- | phrase-extract/extract-lex-main.cpp | 9 | ||||
-rw-r--r-- | phrase-extract/extract-lex.h | 53 |
2 files changed, 5 insertions, 57 deletions
diff --git a/phrase-extract/extract-lex-main.cpp b/phrase-extract/extract-lex-main.cpp index f63015a6a..78182396d 100644 --- a/phrase-extract/extract-lex-main.cpp +++ b/phrase-extract/extract-lex-main.cpp @@ -4,6 +4,7 @@ #include <vector> #include "extract-lex.h" #include "InputFileStream.h" +#include "moses/Util.h" using namespace std; using namespace MosesTraining; @@ -53,9 +54,9 @@ int main(int argc, char* argv[]) assert(isAlign); vector<string> toksTarget, toksSource, toksAlign; - Tokenize(toksTarget, lineTarget); - Tokenize(toksSource, lineSource); - Tokenize(toksAlign, lineAlign); + Moses::Tokenize(toksTarget, lineTarget); + Moses::Tokenize(toksSource, lineSource); + Moses::Tokenize(toksAlign, lineAlign); /* cerr << endl @@ -99,7 +100,7 @@ void ExtractLex::Process(vector<string> &toksTarget, vector<string> &toksSource, const string &alignTok = *iterAlign; vector<size_t> alignPos; - Tokenize(alignPos, alignTok, "-"); + Moses::Tokenize(alignPos, alignTok, "-"); assert(alignPos.size() == 2); if (alignPos[0] >= toksSource.size()) { diff --git a/phrase-extract/extract-lex.h b/phrase-extract/extract-lex.h index 044a32cf8..1d49465c8 100644 --- a/phrase-extract/extract-lex.h +++ b/phrase-extract/extract-lex.h @@ -9,59 +9,6 @@ namespace MosesTraining { - -//! convert string to variable of type T. Used to reading floats, int etc from files -template<typename T> -inline T Scan(const std::string &input) -{ - std::stringstream stream(input); - T ret; - stream >> ret; - return ret; -} - - -//! speeded up version of above -template<typename T> -inline void Scan(std::vector<T> &output, const std::vector< std::string > &input) -{ - output.resize(input.size()); - for (size_t i = 0 ; i < input.size() ; i++) { - output[i] = Scan<T>( input[i] ); - } -} - - -inline void Tokenize(std::vector<std::string> &output - , const std::string& str - , const std::string& delimiters = " \t") -{ - // Skip delimiters at beginning. - std::string::size_type lastPos = str.find_first_not_of(delimiters, 0); - // Find first "non-delimiter". - std::string::size_type pos = str.find_first_of(delimiters, lastPos); - - while (std::string::npos != pos || std::string::npos != lastPos) { - // Found a token, add it to the vector. - output.push_back(str.substr(lastPos, pos - lastPos)); - // Skip delimiters. Note the "not_of" - lastPos = str.find_first_not_of(delimiters, pos); - // Find next "non-delimiter" - pos = str.find_first_of(delimiters, lastPos); - } -} - -// speeded up version of above -template<typename T> -inline void Tokenize( std::vector<T> &output - , const std::string &input - , const std::string& delimiters = " \t") -{ - std::vector<std::string> stringVector; - Tokenize(stringVector, input, delimiters); - return Scan<T>(output, stringVector ); -} - class WordCount { friend std::ostream& operator<<(std::ostream&, const WordCount&); |