Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Huck <mhuck@inf.ed.ac.uk>2015-07-24 23:01:13 +0300
committerMatthias Huck <mhuck@inf.ed.ac.uk>2015-07-24 23:01:13 +0300
commit21aaec0105ffc69c48a4c8977b965af3e05c7a04 (patch)
treed9f0e211b769e3eaeb856051a2f92894f4d7a2dd /phrase-extract
parent472529ade857a69e01f81cac6675fa7eeb9c2ba9 (diff)
Removed some duplicate code.
Can we move all or parts of moses/Util to util/, and from the Moses namespace to the util namespace? There's quite some common functionality in it that is not only relevant to the decoder, but also to phrase extraction and possibly other parts of the toolkit.
Diffstat (limited to 'phrase-extract')
-rw-r--r--phrase-extract/extract-lex-main.cpp9
-rw-r--r--phrase-extract/extract-lex.h53
2 files changed, 5 insertions, 57 deletions
diff --git a/phrase-extract/extract-lex-main.cpp b/phrase-extract/extract-lex-main.cpp
index f63015a6a..78182396d 100644
--- a/phrase-extract/extract-lex-main.cpp
+++ b/phrase-extract/extract-lex-main.cpp
@@ -4,6 +4,7 @@
#include <vector>
#include "extract-lex.h"
#include "InputFileStream.h"
+#include "moses/Util.h"
using namespace std;
using namespace MosesTraining;
@@ -53,9 +54,9 @@ int main(int argc, char* argv[])
assert(isAlign);
vector<string> toksTarget, toksSource, toksAlign;
- Tokenize(toksTarget, lineTarget);
- Tokenize(toksSource, lineSource);
- Tokenize(toksAlign, lineAlign);
+ Moses::Tokenize(toksTarget, lineTarget);
+ Moses::Tokenize(toksSource, lineSource);
+ Moses::Tokenize(toksAlign, lineAlign);
/*
cerr << endl
@@ -99,7 +100,7 @@ void ExtractLex::Process(vector<string> &toksTarget, vector<string> &toksSource,
const string &alignTok = *iterAlign;
vector<size_t> alignPos;
- Tokenize(alignPos, alignTok, "-");
+ Moses::Tokenize(alignPos, alignTok, "-");
assert(alignPos.size() == 2);
if (alignPos[0] >= toksSource.size()) {
diff --git a/phrase-extract/extract-lex.h b/phrase-extract/extract-lex.h
index 044a32cf8..1d49465c8 100644
--- a/phrase-extract/extract-lex.h
+++ b/phrase-extract/extract-lex.h
@@ -9,59 +9,6 @@
namespace MosesTraining
{
-
-//! convert string to variable of type T. Used to reading floats, int etc from files
-template<typename T>
-inline T Scan(const std::string &input)
-{
- std::stringstream stream(input);
- T ret;
- stream >> ret;
- return ret;
-}
-
-
-//! speeded up version of above
-template<typename T>
-inline void Scan(std::vector<T> &output, const std::vector< std::string > &input)
-{
- output.resize(input.size());
- for (size_t i = 0 ; i < input.size() ; i++) {
- output[i] = Scan<T>( input[i] );
- }
-}
-
-
-inline void Tokenize(std::vector<std::string> &output
- , const std::string& str
- , const std::string& delimiters = " \t")
-{
- // Skip delimiters at beginning.
- std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
- // Find first "non-delimiter".
- std::string::size_type pos = str.find_first_of(delimiters, lastPos);
-
- while (std::string::npos != pos || std::string::npos != lastPos) {
- // Found a token, add it to the vector.
- output.push_back(str.substr(lastPos, pos - lastPos));
- // Skip delimiters. Note the "not_of"
- lastPos = str.find_first_not_of(delimiters, pos);
- // Find next "non-delimiter"
- pos = str.find_first_of(delimiters, lastPos);
- }
-}
-
-// speeded up version of above
-template<typename T>
-inline void Tokenize( std::vector<T> &output
- , const std::string &input
- , const std::string& delimiters = " \t")
-{
- std::vector<std::string> stringVector;
- Tokenize(stringVector, input, delimiters);
- return Scan<T>(output, stringVector );
-}
-
class WordCount
{
friend std::ostream& operator<<(std::ostream&, const WordCount&);