Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'phrase-extract')
-rw-r--r--phrase-extract/extract-lex-main.cpp9
-rw-r--r--phrase-extract/extract-lex.h53
2 files changed, 5 insertions, 57 deletions
diff --git a/phrase-extract/extract-lex-main.cpp b/phrase-extract/extract-lex-main.cpp
index f63015a6a..78182396d 100644
--- a/phrase-extract/extract-lex-main.cpp
+++ b/phrase-extract/extract-lex-main.cpp
@@ -4,6 +4,7 @@
#include <vector>
#include "extract-lex.h"
#include "InputFileStream.h"
+#include "moses/Util.h"
using namespace std;
using namespace MosesTraining;
@@ -53,9 +54,9 @@ int main(int argc, char* argv[])
assert(isAlign);
vector<string> toksTarget, toksSource, toksAlign;
- Tokenize(toksTarget, lineTarget);
- Tokenize(toksSource, lineSource);
- Tokenize(toksAlign, lineAlign);
+ Moses::Tokenize(toksTarget, lineTarget);
+ Moses::Tokenize(toksSource, lineSource);
+ Moses::Tokenize(toksAlign, lineAlign);
/*
cerr << endl
@@ -99,7 +100,7 @@ void ExtractLex::Process(vector<string> &toksTarget, vector<string> &toksSource,
const string &alignTok = *iterAlign;
vector<size_t> alignPos;
- Tokenize(alignPos, alignTok, "-");
+ Moses::Tokenize(alignPos, alignTok, "-");
assert(alignPos.size() == 2);
if (alignPos[0] >= toksSource.size()) {
diff --git a/phrase-extract/extract-lex.h b/phrase-extract/extract-lex.h
index 044a32cf8..1d49465c8 100644
--- a/phrase-extract/extract-lex.h
+++ b/phrase-extract/extract-lex.h
@@ -9,59 +9,6 @@
namespace MosesTraining
{
-
-//! convert string to variable of type T. Used to reading floats, int etc from files
-template<typename T>
-inline T Scan(const std::string &input)
-{
- std::stringstream stream(input);
- T ret;
- stream >> ret;
- return ret;
-}
-
-
-//! speeded up version of above
-template<typename T>
-inline void Scan(std::vector<T> &output, const std::vector< std::string > &input)
-{
- output.resize(input.size());
- for (size_t i = 0 ; i < input.size() ; i++) {
- output[i] = Scan<T>( input[i] );
- }
-}
-
-
-inline void Tokenize(std::vector<std::string> &output
- , const std::string& str
- , const std::string& delimiters = " \t")
-{
- // Skip delimiters at beginning.
- std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
- // Find first "non-delimiter".
- std::string::size_type pos = str.find_first_of(delimiters, lastPos);
-
- while (std::string::npos != pos || std::string::npos != lastPos) {
- // Found a token, add it to the vector.
- output.push_back(str.substr(lastPos, pos - lastPos));
- // Skip delimiters. Note the "not_of"
- lastPos = str.find_first_not_of(delimiters, pos);
- // Find next "non-delimiter"
- pos = str.find_first_of(delimiters, lastPos);
- }
-}
-
-// speeded up version of above
-template<typename T>
-inline void Tokenize( std::vector<T> &output
- , const std::string &input
- , const std::string& delimiters = " \t")
-{
- std::vector<std::string> stringVector;
- Tokenize(stringVector, input, delimiters);
- return Scan<T>(output, stringVector );
-}
-
class WordCount
{
friend std::ostream& operator<<(std::ostream&, const WordCount&);