Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieu@hoang.co.uk>2014-02-28 17:59:37 +0400
committerHieu Hoang <hieu@hoang.co.uk>2014-02-28 17:59:37 +0400
commit16761ec96666b657c90575ea5aa1ebb9f42a039e (patch)
tree8188683b4332396dfb1715eb5ac2b7c127216fbe /contrib/other-builds/manual-label
parent859fea99ee96cab762ab6f54a6744bb5c3564609 (diff)
use open nlp chunker
Diffstat (limited to 'contrib/other-builds/manual-label')
-rw-r--r--contrib/other-builds/manual-label/DeEn.cpp2
-rw-r--r--contrib/other-builds/manual-label/DeEn.h2
-rw-r--r--contrib/other-builds/manual-label/EnOpenNLPChunker.cpp24
-rw-r--r--contrib/other-builds/manual-label/EnOpenNLPChunker.h21
-rw-r--r--contrib/other-builds/manual-label/EnPhrasalVerb.h3
-rw-r--r--contrib/other-builds/manual-label/Main.cpp (renamed from contrib/other-builds/manual-label/manual-label.cpp)61
-rw-r--r--contrib/other-builds/manual-label/Main.h (renamed from contrib/other-builds/manual-label/manual-label.h)0
7 files changed, 89 insertions, 24 deletions
diff --git a/contrib/other-builds/manual-label/DeEn.cpp b/contrib/other-builds/manual-label/DeEn.cpp
index 03aa57e92..460bee367 100644
--- a/contrib/other-builds/manual-label/DeEn.cpp
+++ b/contrib/other-builds/manual-label/DeEn.cpp
@@ -1,6 +1,6 @@
#include <list>
#include "DeEn.h"
-#include "manual-label.h"
+#include "Main.h"
#include "moses/Util.h"
using namespace std;
diff --git a/contrib/other-builds/manual-label/DeEn.h b/contrib/other-builds/manual-label/DeEn.h
index 531b56721..c24ce0079 100644
--- a/contrib/other-builds/manual-label/DeEn.h
+++ b/contrib/other-builds/manual-label/DeEn.h
@@ -1,5 +1,5 @@
#pragma once
-#include "manual-label.h"
+#include "Main.h"
void LabelDeEn(const Phrase &source, std::ostream &out);
diff --git a/contrib/other-builds/manual-label/EnOpenNLPChunker.cpp b/contrib/other-builds/manual-label/EnOpenNLPChunker.cpp
new file mode 100644
index 000000000..7ac5b49b1
--- /dev/null
+++ b/contrib/other-builds/manual-label/EnOpenNLPChunker.cpp
@@ -0,0 +1,24 @@
+/*
+ * EnApacheChunker.cpp
+ *
+ * Created on: 28 Feb 2014
+ * Author: hieu
+ */
+
+#include "EnOpenNLPChunker.h"
+
+EnOpenNLPChunker::EnOpenNLPChunker(const std::string &openNLPPath)
+:m_openNLPPath(openNLPPath)
+{
+ // TODO Auto-generated constructor stub
+
+}
+
+EnOpenNLPChunker::~EnOpenNLPChunker() {
+ // TODO Auto-generated destructor stub
+}
+
+void EnOpenNLPChunker::Process(std::istream &in, std::ostream &out)
+{
+
+}
diff --git a/contrib/other-builds/manual-label/EnOpenNLPChunker.h b/contrib/other-builds/manual-label/EnOpenNLPChunker.h
new file mode 100644
index 000000000..773a5017a
--- /dev/null
+++ b/contrib/other-builds/manual-label/EnOpenNLPChunker.h
@@ -0,0 +1,21 @@
+/*
+ * EnApacheChunker.h
+ *
+ * Created on: 28 Feb 2014
+ * Author: hieu
+ */
+
+#pragma once
+
+#include <string>
+#include <iostream>
+
+class EnOpenNLPChunker {
+public:
+ EnOpenNLPChunker(const std::string &openNLPPath);
+ virtual ~EnOpenNLPChunker();
+ void Process(std::istream &in, std::ostream &out);
+protected:
+ const std::string m_openNLPPath;
+};
+
diff --git a/contrib/other-builds/manual-label/EnPhrasalVerb.h b/contrib/other-builds/manual-label/EnPhrasalVerb.h
index 0f184257b..4cb5f7348 100644
--- a/contrib/other-builds/manual-label/EnPhrasalVerb.h
+++ b/contrib/other-builds/manual-label/EnPhrasalVerb.h
@@ -1,7 +1,8 @@
#pragma once
-#include "manual-label.h"
+#include "Main.h"
+// roll your own identification of phrasal verbs
void EnPhrasalVerb(const Phrase &source, int revision, std::ostream &out);
bool Exist(const Phrase &source, int start, int end, int factor, const std::string &str);
diff --git a/contrib/other-builds/manual-label/manual-label.cpp b/contrib/other-builds/manual-label/Main.cpp
index d81b7c1e1..e9018ffe5 100644
--- a/contrib/other-builds/manual-label/manual-label.cpp
+++ b/contrib/other-builds/manual-label/Main.cpp
@@ -2,9 +2,10 @@
#include <cstdlib>
#include <boost/program_options.hpp>
#include "moses/Util.h"
-#include "manual-label.h"
+#include "Main.h"
#include "DeEn.h"
#include "EnPhrasalVerb.h"
+#include "EnOpenNLPChunker.h"
using namespace std;
@@ -23,7 +24,11 @@ int main(int argc, char** argv)
("add", "additional options")
("source-language,s", po::value<string>()->required(), "Source Language")
("target-language,t", po::value<string>()->required(), "Target Language")
- ("revision,r", po::value<int>()->default_value(0), "Revision");
+ ("revision,r", po::value<int>()->default_value(0), "Revision")
+
+ ("opennlp-path", po::value<int>()->default_value(0), "Path to Apache OpenNLP toolkit")
+
+ ;
po::variables_map vm;
try
@@ -56,27 +61,41 @@ int main(int argc, char** argv)
cerr << sourceLang << " " << targetLang << " " << revision << endl;
- string line;
- size_t lineNum = 1;
-
- while (getline(cin, line)) {
- //cerr << lineNum << ":" << line << endl;
- if (lineNum % 1000 == 0) {
- cerr << lineNum << " ";
- }
-
- Phrase source = Tokenize(line);
- if (sourceLang == "de" && targetLang == "en") {
- LabelDeEn(source, cout);
- }
- else if (sourceLang == "en") {
- EnPhrasalVerb(source, revision, cout);
- }
-
- ++lineNum;
+ if (sourceLang == "en" && revision == 2) {
+ string openNLPPath = vm["opennlp-path"].as<string>();
+ EnOpenNLPChunker chunker(openNLPPath);
+ chunker.Process(cin, cout);
+ }
+ else {
+ // process line-by-line
+ string line;
+ size_t lineNum = 1;
+
+ while (getline(cin, line)) {
+ //cerr << lineNum << ":" << line << endl;
+ if (lineNum % 1000 == 0) {
+ cerr << lineNum << " ";
+ }
+
+ Phrase source = Tokenize(line);
+
+ if (sourceLang == "de" && targetLang == "en") {
+ LabelDeEn(source, cout);
+ }
+ else if (sourceLang == "en") {
+ if (revision == 0 || revision == 1) {
+ EnPhrasalVerb(source, revision, cout);
+ }
+ else if (revision == 2) {
+ string openNLPPath = vm["opennlp-path"].as<string>();
+ EnOpenNLPChunker chunker(openNLPPath);
+ }
+ }
+
+ ++lineNum;
+ }
}
-
cerr << "Finished" << endl;
diff --git a/contrib/other-builds/manual-label/manual-label.h b/contrib/other-builds/manual-label/Main.h
index bc93fe003..bc93fe003 100644
--- a/contrib/other-builds/manual-label/manual-label.h
+++ b/contrib/other-builds/manual-label/Main.h