From 21aaec0105ffc69c48a4c8977b965af3e05c7a04 Mon Sep 17 00:00:00 2001
From: Matthias Huck <mhuck@inf.ed.ac.uk>
Date: Fri, 24 Jul 2015 21:01:13 +0100
Subject: Removed some duplicate code.

Can we move all or parts of moses/Util to util/, and
from the Moses namespace to the util namespace?

There's quite some common functionality in it that is not only relevant
to the decoder, but also to phrase extraction and possibly other parts
of the toolkit.
---
 phrase-extract/extract-lex-main.cpp |  9 ++++---
 phrase-extract/extract-lex.h        | 53 -------------------------------------
 2 files changed, 5 insertions(+), 57 deletions(-)

(limited to 'phrase-extract')
diff --git a/phrase-extract/extract-lex-main.cpp b/phrase-extract/extract-lex-main.cpp
index f63015a6a..78182396d 100644
--- a/phrase-extract/extract-lex-main.cpp
+++ b/phrase-extract/extract-lex-main.cpp
@@ -4,6 +4,7 @@
 #include <vector>
 #include "extract-lex.h"
 #include "InputFileStream.h"
+#include "moses/Util.h"
 
 using namespace std;
 using namespace MosesTraining;
@@ -53,9 +54,9 @@ int main(int argc, char* argv[])
     assert(isAlign);
 
     vector<string> toksTarget, toksSource, toksAlign;
-    Tokenize(toksTarget, lineTarget);
-    Tokenize(toksSource, lineSource);
-    Tokenize(toksAlign, lineAlign);
+    Moses::Tokenize(toksTarget, lineTarget);
+    Moses::Tokenize(toksSource, lineSource);
+    Moses::Tokenize(toksAlign, lineAlign);
 
     /*
     cerr  << endl
@@ -99,7 +100,7 @@ void ExtractLex::Process(vector<string> &toksTarget, vector<string> &toksSource,
     const string &alignTok = *iterAlign;
 
     vector<size_t> alignPos;
-    Tokenize(alignPos, alignTok, "-");
+    Moses::Tokenize(alignPos, alignTok, "-");
     assert(alignPos.size() == 2);
 
     if (alignPos[0] >= toksSource.size()) {
diff --git a/phrase-extract/extract-lex.h b/phrase-extract/extract-lex.h
index 044a32cf8..1d49465c8 100644
--- a/phrase-extract/extract-lex.h
+++ b/phrase-extract/extract-lex.h
@@ -9,59 +9,6 @@
 namespace MosesTraining
 {
 
-
-//! convert string to variable of type T. Used to reading floats, int etc from files
-template<typename T>
-inline T Scan(const std::string &input)
-{
-  std::stringstream stream(input);
-  T ret;
-  stream >> ret;
-  return ret;
-}
-
-
-//! speeded up version of above
-template<typename T>
-inline void Scan(std::vector<T> &output, const std::vector< std::string > &input)
-{
-  output.resize(input.size());
-  for (size_t i = 0 ; i < input.size() ; i++) {
-    output[i] = Scan<T>( input[i] );
-  }
-}
-
-
-inline void Tokenize(std::vector<std::string> &output
-                     , const std::string& str
-                     , const std::string& delimiters = " \t")
-{
-  // Skip delimiters at beginning.
-  std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
-  // Find first "non-delimiter".
-  std::string::size_type pos     = str.find_first_of(delimiters, lastPos);
-
-  while (std::string::npos != pos || std::string::npos != lastPos) {
-    // Found a token, add it to the vector.
-    output.push_back(str.substr(lastPos, pos - lastPos));
-    // Skip delimiters.  Note the "not_of"
-    lastPos = str.find_first_not_of(delimiters, pos);
-    // Find next "non-delimiter"
-    pos = str.find_first_of(delimiters, lastPos);
-  }
-}
-
-// speeded up version of above
-template<typename T>
-inline void Tokenize( std::vector<T> &output
-                      , const std::string &input
-                      , const std::string& delimiters = " \t")
-{
-  std::vector<std::string> stringVector;
-  Tokenize(stringVector, input, delimiters);
-  return Scan<T>(output, stringVector );
-}
-
 class WordCount
 {
   friend std::ostream& operator<<(std::ostream&, const WordCount&);
-- 
cgit v1.2.3