Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/moses2/TranslationModel/Transliteration.cpp')
-rw-r--r--contrib/moses2/TranslationModel/Transliteration.cpp229
1 files changed, 229 insertions, 0 deletions
diff --git a/contrib/moses2/TranslationModel/Transliteration.cpp b/contrib/moses2/TranslationModel/Transliteration.cpp
new file mode 100644
index 000000000..f92348ee9
--- /dev/null
+++ b/contrib/moses2/TranslationModel/Transliteration.cpp
@@ -0,0 +1,229 @@
+/*
+ * Transliteration.cpp
+ *
+ * Created on: 28 Oct 2015
+ * Author: hieu
+ */
+#include <boost/foreach.hpp>
+#include "Transliteration.h"
+#include "../System.h"
+#include "../Scores.h"
+#include "../InputType.h"
+#include "../PhraseBased/Manager.h"
+#include "../PhraseBased/TargetPhraseImpl.h"
+#include "../PhraseBased/InputPath.h"
+#include "../PhraseBased/TargetPhrases.h"
+#include "../PhraseBased/Sentence.h"
+#include "../SCFG/InputPath.h"
+#include "../SCFG/TargetPhraseImpl.h"
+#include "../SCFG/Manager.h"
+#include "../SCFG/Sentence.h"
+#include "../SCFG/ActiveChart.h"
+#include "util/tempfile.hh"
+#include "../legacy/Util2.h"
+
+using namespace std;
+
+namespace Moses2
+{
+
+Transliteration::Transliteration(size_t startInd, const std::string &line) :
+ PhraseTable(startInd, line)
+{
+ ReadParameters();
+ UTIL_THROW_IF2(m_mosesDir.empty() ||
+ m_scriptDir.empty() ||
+ m_externalDir.empty() ||
+ m_inputLang.empty() ||
+ m_outputLang.empty(), "Must specify all arguments");
+}
+
+Transliteration::~Transliteration()
+{
+ // TODO Auto-generated destructor stub
+}
+
+void
+Transliteration::
+SetParameter(const std::string& key, const std::string& value)
+{
+ if (key == "moses-dir") {
+ m_mosesDir = value;
+ } else if (key == "script-dir") {
+ m_scriptDir = value;
+ } else if (key == "external-dir") {
+ m_externalDir = value;
+ } else if (key == "input-lang") {
+ m_inputLang = value;
+ } else if (key == "output-lang") {
+ m_outputLang = value;
+ } else {
+ PhraseTable::SetParameter(key, value);
+ }
+}
+
+void Transliteration::Lookup(const Manager &mgr,
+ InputPathsBase &inputPaths) const
+{
+ BOOST_FOREACH(InputPathBase *pathBase, inputPaths){
+ InputPath *path = static_cast<InputPath*>(pathBase);
+
+ if (SatisfyBackoff(mgr, *path)) {
+ const SubPhrase<Moses2::Word> &phrase = path->subPhrase;
+
+ TargetPhrases *tps = Lookup(mgr, mgr.GetPool(), *path);
+ path->AddTargetPhrases(*this, tps);
+ }
+ }
+
+}
+
+TargetPhrases *Transliteration::Lookup(const Manager &mgr, MemPool &pool,
+ InputPath &inputPath) const
+{
+ const SubPhrase<Moses2::Word> &sourcePhrase = inputPath.subPhrase;
+ size_t hash = sourcePhrase.hash();
+
+ // TRANSLITERATE
+ const util::temp_file inFile;
+ const util::temp_dir outDir;
+
+ ofstream inStream(inFile.path().c_str());
+ inStream << sourcePhrase.Debug(mgr.system) << endl;
+ inStream.close();
+
+ string cmd = m_scriptDir + "/Transliteration/prepare-transliteration-phrase-table.pl" +
+ " --transliteration-model-dir " + m_filePath +
+ " --moses-src-dir " + m_mosesDir +
+ " --external-bin-dir " + m_externalDir +
+ " --input-extension " + m_inputLang +
+ " --output-extension " + m_outputLang +
+ " --oov-file " + inFile.path() +
+ " --out-dir " + outDir.path();
+
+ int ret = system(cmd.c_str());
+ UTIL_THROW_IF2(ret != 0, "Transliteration script error");
+
+ TargetPhrases *tps = NULL;
+ tps = new (pool.Allocate<TargetPhrases>()) TargetPhrases(pool, 1);
+
+ vector<TargetPhraseImpl*> targetPhrases
+ = CreateTargetPhrases(mgr, pool, sourcePhrase, outDir.path());
+
+ vector<TargetPhraseImpl*>::const_iterator iter;
+ for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
+ TargetPhraseImpl *tp = *iter;
+ tps->AddTargetPhrase(*tp);
+ }
+ mgr.system.featureFunctions.EvaluateAfterTablePruning(pool, *tps, sourcePhrase);
+
+ inputPath.AddTargetPhrases(*this, tps);
+}
+
+std::vector<TargetPhraseImpl*> Transliteration::CreateTargetPhrases(
+ const Manager &mgr,
+ MemPool &pool,
+ const SubPhrase<Moses2::Word> &sourcePhrase,
+ const std::string &outDir) const
+{
+ std::vector<TargetPhraseImpl*> ret;
+
+ string outPath = outDir + "/out.txt";
+ ifstream outStream(outPath.c_str());
+
+ string line;
+ while (getline(outStream, line)) {
+ vector<string> toks = Moses2::Tokenize(line, "\t");
+ UTIL_THROW_IF2(toks.size() != 2, "Error in transliteration output file. Expecting word\tscore");
+
+ TargetPhraseImpl *tp =
+ new (pool.Allocate<TargetPhraseImpl>()) TargetPhraseImpl(pool, *this, mgr.system, 1);
+ Moses2::Word &word = (*tp)[0];
+ word.CreateFromString(mgr.system.GetVocab(), mgr.system, toks[0]);
+
+ float score = Scan<float>(toks[1]);
+ tp->GetScores().PlusEquals(mgr.system, *this, score);
+
+ // score of all other ff when this rule is being loaded
+ mgr.system.featureFunctions.EvaluateInIsolation(pool, mgr.system, sourcePhrase, *tp);
+
+ ret.push_back(tp);
+ }
+
+ outStream.close();
+
+ return ret;
+
+}
+
+
+void Transliteration::EvaluateInIsolation(const System &system,
+ const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
+ SCORE &estimatedScore) const
+{
+ UTIL_THROW2("Not implemented");
+}
+
+// SCFG ///////////////////////////////////////////////////////////////////////////////////////////
+void Transliteration::InitActiveChart(
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ SCFG::InputPath &path) const
+{
+ UTIL_THROW2("Not implemented");
+}
+
+void Transliteration::Lookup(MemPool &pool,
+ const SCFG::Manager &mgr,
+ size_t maxChartSpan,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &path) const
+{
+ UTIL_THROW2("Not implemented");
+}
+
+void Transliteration::LookupUnary(MemPool &pool,
+ const SCFG::Manager &mgr,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &path) const
+{
+ UTIL_THROW2("Not implemented");
+}
+
+void Transliteration::LookupNT(
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const Moses2::Range &subPhraseRange,
+ const SCFG::InputPath &prevPath,
+ const SCFG::Stacks &stacks,
+ SCFG::InputPath &outPath) const
+{
+ UTIL_THROW2("Not implemented");
+}
+
+void Transliteration::LookupGivenWord(
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const SCFG::InputPath &prevPath,
+ const SCFG::Word &wordSought,
+ const Moses2::Hypotheses *hypos,
+ const Moses2::Range &subPhraseRange,
+ SCFG::InputPath &outPath) const
+{
+ UTIL_THROW2("Not implemented");
+}
+
+void Transliteration::LookupGivenNode(
+ MemPool &pool,
+ const SCFG::Manager &mgr,
+ const SCFG::ActiveChartEntry &prevEntry,
+ const SCFG::Word &wordSought,
+ const Moses2::Hypotheses *hypos,
+ const Moses2::Range &subPhraseRange,
+ SCFG::InputPath &outPath) const
+{
+ UTIL_THROW2("Not implemented");
+}
+
+}
+