Removed some duplicate code.

Can we move all or parts of moses/Util to util/, and from the Moses namespace to the util namespace? There's quite some common functionality in it that is not only relevant to the decoder, but also to phrase extraction and possibly other parts of the toolkit.
author: Matthias Huck <mhuck@inf.ed.ac.uk> 2015-07-24 23:01:13 +0300
committer: Matthias Huck <mhuck@inf.ed.ac.uk> 2015-07-24 23:01:13 +0300
commit: 21aaec0105ffc69c48a4c8977b965af3e05c7a04 (patch)
tree: d9f0e211b769e3eaeb856051a2f92894f4d7a2dd /phrase-extract
parent: 472529ade857a69e01f81cac6675fa7eeb9c2ba9 (diff)
2 files changed, 5 insertions, 57 deletions
diff --git a/phrase-extract/extract-lex-main.cpp b/phrase-extract/extract-lex-main.cpp
index f63015a6a..78182396d 100644
--- a/phrase-extract/extract-lex-main.cpp
+++ b/phrase-extract/extract-lex-main.cpp
@@ -4,6 +4,7 @@
 #include <vector>
 #include "extract-lex.h"
 #include "InputFileStream.h"
+#include "moses/Util.h"
 
 using namespace std;
 using namespace MosesTraining;
@@ -53,9 +54,9 @@ int main(int argc, char* argv[])
     assert(isAlign);
 
     vector<string> toksTarget, toksSource, toksAlign;
-    Tokenize(toksTarget, lineTarget);
-    Tokenize(toksSource, lineSource);
-    Tokenize(toksAlign, lineAlign);
+    Moses::Tokenize(toksTarget, lineTarget);
+    Moses::Tokenize(toksSource, lineSource);
+    Moses::Tokenize(toksAlign, lineAlign);
 
     /*
     cerr  << endl
@@ -99,7 +100,7 @@ void ExtractLex::Process(vector<string> &toksTarget, vector<string> &toksSource,
     const string &alignTok = *iterAlign;
 
     vector<size_t> alignPos;
-    Tokenize(alignPos, alignTok, "-");
+    Moses::Tokenize(alignPos, alignTok, "-");
     assert(alignPos.size() == 2);
 
     if (alignPos[0] >= toksSource.size()) {
diff --git a/phrase-extract/extract-lex.h b/phrase-extract/extract-lex.h
index 044a32cf8..1d49465c8 100644
--- a/phrase-extract/extract-lex.h
+++ b/phrase-extract/extract-lex.h
@@ -9,59 +9,6 @@
 namespace MosesTraining
 {
 
-
-//! convert string to variable of type T. Used to reading floats, int etc from files
-template<typename T>
-inline T Scan(const std::string &input)
-{
-  std::stringstream stream(input);
-  T ret;
-  stream >> ret;
-  return ret;
-}
-
-
-//! speeded up version of above
-template<typename T>
-inline void Scan(std::vector<T> &output, const std::vector< std::string > &input)
-{
-  output.resize(input.size());
-  for (size_t i = 0 ; i < input.size() ; i++) {
-    output[i] = Scan<T>( input[i] );
-  }
-}
-
-
-inline void Tokenize(std::vector<std::string> &output
-                     , const std::string& str
-                     , const std::string& delimiters = " \t")
-{
-  // Skip delimiters at beginning.
-  std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
-  // Find first "non-delimiter".
-  std::string::size_type pos     = str.find_first_of(delimiters, lastPos);
-
-  while (std::string::npos != pos || std::string::npos != lastPos) {
-    // Found a token, add it to the vector.
-    output.push_back(str.substr(lastPos, pos - lastPos));
-    // Skip delimiters.  Note the "not_of"
-    lastPos = str.find_first_not_of(delimiters, pos);
-    // Find next "non-delimiter"
-    pos = str.find_first_of(delimiters, lastPos);
-  }
-}
-
-// speeded up version of above
-template<typename T>
-inline void Tokenize( std::vector<T> &output
-                      , const std::string &input
-                      , const std::string& delimiters = " \t")
-{
-  std::vector<std::string> stringVector;
-  Tokenize(stringVector, input, delimiters);
-  return Scan<T>(output, stringVector );
-}
-
 class WordCount
 {
   friend std::ostream& operator<<(std::ostream&, const WordCount&);
author	Matthias Huck <mhuck@inf.ed.ac.uk>	2015-07-24 23:01:13 +0300
committer	Matthias Huck <mhuck@inf.ed.ac.uk>	2015-07-24 23:01:13 +0300
commit	21aaec0105ffc69c48a4c8977b965af3e05c7a04 (patch)
tree	d9f0e211b769e3eaeb856051a2f92894f4d7a2dd /phrase-extract
parent	472529ade857a69e01f81cac6675fa7eeb9c2ba9 (diff)