Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHieu Hoang <hieuhoang@gmail.com>2016-09-27 20:16:44 +0300
committerHieu Hoang <hieuhoang@gmail.com>2016-09-27 20:16:44 +0300
commit24521c035a5078f2833c432f7b0f156316fb7ae1 (patch)
tree0526b207fa5a3feb6cf960d5f0ca6ab8449287bf
parent73c20f2c1fffaa458450f1ec9bcbe2d27c902fc7 (diff)
import Lookup for pb
-rw-r--r--contrib/moses2/TranslationModel/Transliteration.cpp198
-rw-r--r--contrib/moses2/TranslationModel/Transliteration.h19
2 files changed, 102 insertions, 115 deletions
diff --git a/contrib/moses2/TranslationModel/Transliteration.cpp b/contrib/moses2/TranslationModel/Transliteration.cpp
index 4dd0a619a..f92348ee9 100644
--- a/contrib/moses2/TranslationModel/Transliteration.cpp
+++ b/contrib/moses2/TranslationModel/Transliteration.cpp
@@ -19,6 +19,8 @@
#include "../SCFG/Manager.h"
#include "../SCFG/Sentence.h"
#include "../SCFG/ActiveChart.h"
+#include "util/tempfile.hh"
+#include "../legacy/Util2.h"
using namespace std;
@@ -28,8 +30,12 @@ namespace Moses2
Transliteration::Transliteration(size_t startInd, const std::string &line) :
PhraseTable(startInd, line)
{
- m_tuneable = false;
ReadParameters();
+ UTIL_THROW_IF2(m_mosesDir.empty() ||
+ m_scriptDir.empty() ||
+ m_externalDir.empty() ||
+ m_inputLang.empty() ||
+ m_outputLang.empty(), "Must specify all arguments");
}
Transliteration::~Transliteration()
@@ -37,33 +43,23 @@ Transliteration::~Transliteration()
// TODO Auto-generated destructor stub
}
-void Transliteration::ProcessXML(
- const Manager &mgr,
- MemPool &pool,
- const Sentence &sentence,
- InputPaths &inputPaths) const
+void
+Transliteration::
+SetParameter(const std::string& key, const std::string& value)
{
- const Vector<const InputType::XMLOption*> &xmlOptions = sentence.GetXMLOptions();
- BOOST_FOREACH(const InputType::XMLOption *xmlOption, xmlOptions) {
- TargetPhraseImpl *target = TargetPhraseImpl::CreateFromString(pool, *this, mgr.system, xmlOption->GetTranslation());
-
- if (xmlOption->prob) {
- Scores &scores = target->GetScores();
- scores.PlusEquals(mgr.system, *this, Moses2::TransformScore(xmlOption->prob));
- }
-
- InputPath *path = inputPaths.GetMatrix().GetValue(xmlOption->startPos, xmlOption->phraseSize - 1);
- const SubPhrase<Moses2::Word> &source = path->subPhrase;
-
- mgr.system.featureFunctions.EvaluateInIsolation(pool, mgr.system, source, *target);
-
- TargetPhrases *tps = new (pool.Allocate<TargetPhrases>()) TargetPhrases(pool, 1);
-
- tps->AddTargetPhrase(*target);
- mgr.system.featureFunctions.EvaluateAfterTablePruning(pool, *tps, source);
-
- path->AddTargetPhrases(*this, tps);
- }
+ if (key == "moses-dir") {
+ m_mosesDir = value;
+ } else if (key == "script-dir") {
+ m_scriptDir = value;
+ } else if (key == "external-dir") {
+ m_externalDir = value;
+ } else if (key == "input-lang") {
+ m_inputLang = value;
+ } else if (key == "output-lang") {
+ m_outputLang = value;
+ } else {
+ PhraseTable::SetParameter(key, value);
+ }
}
void Transliteration::Lookup(const Manager &mgr,
@@ -85,53 +81,87 @@ void Transliteration::Lookup(const Manager &mgr,
TargetPhrases *Transliteration::Lookup(const Manager &mgr, MemPool &pool,
InputPath &inputPath) const
{
- const System &system = mgr.system;
+ const SubPhrase<Moses2::Word> &sourcePhrase = inputPath.subPhrase;
+ size_t hash = sourcePhrase.hash();
+
+ // TRANSLITERATE
+ const util::temp_file inFile;
+ const util::temp_dir outDir;
+
+ ofstream inStream(inFile.path().c_str());
+ inStream << sourcePhrase.Debug(mgr.system) << endl;
+ inStream.close();
+
+ string cmd = m_scriptDir + "/Transliteration/prepare-transliteration-phrase-table.pl" +
+ " --transliteration-model-dir " + m_filePath +
+ " --moses-src-dir " + m_mosesDir +
+ " --external-bin-dir " + m_externalDir +
+ " --input-extension " + m_inputLang +
+ " --output-extension " + m_outputLang +
+ " --oov-file " + inFile.path() +
+ " --out-dir " + outDir.path();
+
+ int ret = system(cmd.c_str());
+ UTIL_THROW_IF2(ret != 0, "Transliteration script error");
+
TargetPhrases *tps = NULL;
+ tps = new (pool.Allocate<TargetPhrases>()) TargetPhrases(pool, 1);
- // any other pt translate this?
- size_t numPt = mgr.system.mappings.size();
- const TargetPhrases **allTPS =
- static_cast<InputPath&>(inputPath).targetPhrases;
- for (size_t i = 0; i < numPt; ++i) {
- const TargetPhrases *otherTps = allTPS[i];
+ vector<TargetPhraseImpl*> targetPhrases
+ = CreateTargetPhrases(mgr, pool, sourcePhrase, outDir.path());
- if (otherTps && otherTps->GetSize()) {
- return tps;
- }
+ vector<TargetPhraseImpl*>::const_iterator iter;
+ for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
+ TargetPhraseImpl *tp = *iter;
+ tps->AddTargetPhrase(*tp);
}
+ mgr.system.featureFunctions.EvaluateAfterTablePruning(pool, *tps, sourcePhrase);
- const SubPhrase<Moses2::Word> &source = inputPath.subPhrase;
- const Moses2::Word &sourceWord = source[0];
- const Factor *factor = sourceWord[0];
+ inputPath.AddTargetPhrases(*this, tps);
+}
- tps = new (pool.Allocate<TargetPhrases>()) TargetPhrases(pool, 1);
+std::vector<TargetPhraseImpl*> Transliteration::CreateTargetPhrases(
+ const Manager &mgr,
+ MemPool &pool,
+ const SubPhrase<Moses2::Word> &sourcePhrase,
+ const std::string &outDir) const
+{
+ std::vector<TargetPhraseImpl*> ret;
- TargetPhraseImpl *target =
- new (pool.Allocate<TargetPhraseImpl>()) TargetPhraseImpl(pool, *this,
- system, 1);
- Moses2::Word &word = (*target)[0];
+ string outPath = outDir + "/out.txt";
+ ifstream outStream(outPath.c_str());
- //FactorCollection &fc = system.vocab;
- //const Factor *factor = fc.AddFactor("SSS", false);
- word[0] = factor;
+ string line;
+ while (getline(outStream, line)) {
+ vector<string> toks = Moses2::Tokenize(line, "\t");
+ UTIL_THROW_IF2(toks.size() != 2, "Error in transliteration output file. Expecting word\tscore");
- Scores &scores = target->GetScores();
- scores.PlusEquals(mgr.system, *this, -100);
+ TargetPhraseImpl *tp =
+ new (pool.Allocate<TargetPhraseImpl>()) TargetPhraseImpl(pool, *this, mgr.system, 1);
+ Moses2::Word &word = (*tp)[0];
+ word.CreateFromString(mgr.system.GetVocab(), mgr.system, toks[0]);
- MemPool &memPool = mgr.GetPool();
- system.featureFunctions.EvaluateInIsolation(memPool, system, source, *target);
+ float score = Scan<float>(toks[1]);
+ tp->GetScores().PlusEquals(mgr.system, *this, score);
- tps->AddTargetPhrase(*target);
- system.featureFunctions.EvaluateAfterTablePruning(memPool, *tps, source);
+ // score of all other ff when this rule is being loaded
+ mgr.system.featureFunctions.EvaluateInIsolation(pool, mgr.system, sourcePhrase, *tp);
+
+ ret.push_back(tp);
+ }
+
+ outStream.close();
+
+ return ret;
- return tps;
}
+
void Transliteration::EvaluateInIsolation(const System &system,
const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
-
+ UTIL_THROW2("Not implemented");
}
// SCFG ///////////////////////////////////////////////////////////////////////////////////////////
@@ -140,6 +170,7 @@ void Transliteration::InitActiveChart(
const SCFG::Manager &mgr,
SCFG::InputPath &path) const
{
+ UTIL_THROW2("Not implemented");
}
void Transliteration::Lookup(MemPool &pool,
@@ -148,59 +179,7 @@ void Transliteration::Lookup(MemPool &pool,
const SCFG::Stacks &stacks,
SCFG::InputPath &path) const
{
- const System &system = mgr.system;
-
- size_t numWords = path.range.GetNumWordsCovered();
- if (numWords > 1) {
- // only create 1 word phrases
- return;
- }
-
- if (path.GetNumRules()) {
- // only create rules if no other rules
- return;
- }
-
- // don't do 1st if 1st word
- if (path.range.GetStartPos() == 0) {
- return;
- }
-
- // don't do 1st if last word
- const SCFG::Sentence &sentence = static_cast<const SCFG::Sentence&>(mgr.GetInput());
- if (path.range.GetStartPos() + 1 == sentence.GetSize()) {
- return;
- }
-
- // terminal
- const SCFG::Word &lastWord = path.subPhrase.Back();
- //cerr << "Transliteration lastWord=" << lastWord << endl;
-
- const Factor *factor = lastWord[0];
- SCFG::TargetPhraseImpl *tp = new (pool.Allocate<SCFG::TargetPhraseImpl>()) SCFG::TargetPhraseImpl(pool, *this, system, 1);
- SCFG::Word &word = (*tp)[0];
- word.CreateFromString(system.GetVocab(), system, factor->GetString().as_string());
-
- tp->lhs.CreateFromString(system.GetVocab(), system, "[X]");
-
- size_t endPos = path.range.GetEndPos();
- const SCFG::InputPath &subPhrasePath = *mgr.GetInputPaths().GetMatrix().GetValue(endPos, 1);
-
- SCFG::ActiveChartEntry *chartEntry = new (pool.Allocate<SCFG::ActiveChartEntry>()) SCFG::ActiveChartEntry(pool);
- chartEntry->AddSymbolBindElement(subPhrasePath.range, lastWord, NULL, *this);
- path.AddActiveChartEntry(GetPtInd(), chartEntry);
-
- Scores &scores = tp->GetScores();
- scores.PlusEquals(mgr.system, *this, -100);
-
- MemPool &memPool = mgr.GetPool();
- const SubPhrase<SCFG::Word> &source = path.subPhrase;
- system.featureFunctions.EvaluateInIsolation(memPool, system, source, *tp);
-
- SCFG::TargetPhrases *tps = new (pool.Allocate<SCFG::TargetPhrases>()) SCFG::TargetPhrases(pool);
- tps->AddTargetPhrase(*tp);
-
- path.AddTargetPhrasesToPath(pool, mgr.system, *this, *tps, chartEntry->GetSymbolBind());
+ UTIL_THROW2("Not implemented");
}
void Transliteration::LookupUnary(MemPool &pool,
@@ -208,6 +187,7 @@ void Transliteration::LookupUnary(MemPool &pool,
const SCFG::Stacks &stacks,
SCFG::InputPath &path) const
{
+ UTIL_THROW2("Not implemented");
}
void Transliteration::LookupNT(
diff --git a/contrib/moses2/TranslationModel/Transliteration.h b/contrib/moses2/TranslationModel/Transliteration.h
index 813bcc149..15f262ac8 100644
--- a/contrib/moses2/TranslationModel/Transliteration.h
+++ b/contrib/moses2/TranslationModel/Transliteration.h
@@ -25,12 +25,6 @@ public:
virtual TargetPhrases *Lookup(const Manager &mgr, MemPool &pool,
InputPath &inputPath) const;
- void ProcessXML(
- const Manager &mgr,
- MemPool &pool,
- const Sentence &sentence,
- InputPaths &inputPaths) const;
-
virtual void
EvaluateInIsolation(const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
@@ -78,6 +72,19 @@ protected:
const Moses2::Hypotheses *hypos,
const Moses2::Range &subPhraseRange,
SCFG::InputPath &outPath) const;
+
+ void SetParameter(const std::string& key, const std::string& value);
+
+protected:
+ std::string m_filePath;
+ std::string m_mosesDir, m_scriptDir, m_externalDir, m_inputLang, m_outputLang;
+
+ std::vector<TargetPhraseImpl*> CreateTargetPhrases(
+ const Manager &mgr,
+ MemPool &pool,
+ const SubPhrase<Moses2::Word> &sourcePhrase,
+ const std::string &outDir) const;
+
};
}