Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--moses/TranslationModel/TransliterationPhraseDictionary.cpp80
-rw-r--r--moses/TranslationModel/TransliterationPhraseDictionary.h2
-rwxr-xr-xscripts/Transliteration/prepare-transliteration-phrase-table.pl6
3 files changed, 67 insertions, 21 deletions
diff --git a/moses/TranslationModel/TransliterationPhraseDictionary.cpp b/moses/TranslationModel/TransliterationPhraseDictionary.cpp
index bdc5b3ec7..c8de4ffc0 100644
--- a/moses/TranslationModel/TransliterationPhraseDictionary.cpp
+++ b/moses/TranslationModel/TransliterationPhraseDictionary.cpp
@@ -1,4 +1,5 @@
// vim:tabstop=2
+#include <stdlib.h>
#include "TransliterationPhraseDictionary.h"
#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h"
@@ -19,6 +20,13 @@ void TransliterationPhraseDictionary::CleanUpAfterSentenceProcessing(const Input
void TransliterationPhraseDictionary::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const
{
+ string mosesDir = "/home/hieu/workspace/github/mosesdecoder";
+ string scriptDir = mosesDir + "/scripts";
+ string externalDir = "/home/hieu/workspace/bin/training-tools";
+ string modelDir = "/home/hieu/workspace/experiment/data/issues/transliteration/Transliteration.3";
+ string inputLang = "en";
+ string outputLang = "ar";
+
InputPathList::const_iterator iter;
for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) {
InputPath &inputPath = **iter;
@@ -40,40 +48,76 @@ void TransliterationPhraseDictionary::GetTargetPhraseCollectionBatch(const Input
}
// TRANSLITERATE
- // /home/nadir/mosesdecoder/scripts/Transliteration/prepare-transliteration-phrase-table.pl --transliteration-model-dir /home/nadir/iwslt13-en-ar/model/Transliteration.3 --moses-src-dir /home/nadir/mosesdecoder --external-bin-dir /home/pkoehn/statmt/bin --input-extension en --output-extension ar --oov-file /fs/syn4/nadir/iwslt13-en-ar/evaluation/temp.oov --out-dir /home/nadir/iwslt13-en-ar/model/Transliteration-Phrase-Table.3
- TargetPhrase *tp = CreateTargetPhrase(sourcePhrase);
+ char *ptr = tmpnam(NULL);
+ string inFile(ptr);
+ ptr = tmpnam(NULL);
+ string outDir(ptr);
+
+ ofstream inStream(inFile.c_str());
+ inStream << sourcePhrase.ToString() << endl;
+ inStream.close();
+
+ string cmd = scriptDir + "/Transliteration/prepare-transliteration-phrase-table.pl" +
+ " --transliteration-model-dir " + modelDir +
+ " --moses-src-dir " + mosesDir +
+ " --external-bin-dir " + externalDir +
+ " --input-extension " + inputLang +
+ " --output-extension " + outputLang +
+ " --oov-file " + inFile +
+ " --out-dir " + outDir;
+
+ int ret = system(cmd.c_str());
+ UTIL_THROW_IF2(ret != 0, "Transliteration script error");
+
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
- tpColl->Add(tp);
+ vector<TargetPhrase*> targetPhrases = CreateTargetPhrases(sourcePhrase, outDir);
+ vector<TargetPhrase*>::const_iterator iter;
+ for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
+ TargetPhrase *tp = *iter;
+ tpColl->Add(tp);
+ }
m_allTPColl.push_back(tpColl);
inputPath.SetTargetPhrases(*this, tpColl, NULL);
+ remove(inFile.c_str());
+
+ cmd = "rm -rf " + outDir;
+ system(cmd.c_str());
}
}
}
-TargetPhrase *TransliterationPhraseDictionary::CreateTargetPhrase(const Phrase &sourcePhrase) const
+std::vector<TargetPhrase*> TransliterationPhraseDictionary::CreateTargetPhrases(const Phrase &sourcePhrase, const string &outDir) const
{
- // create a target phrase from the 1st word of the source, prefix with 'TransliterationPhraseDictionary:'
- assert(sourcePhrase.GetSize());
- assert(m_output.size() == 1);
+ std::vector<TargetPhrase*> ret;
+
+ string outPath = outDir + "/out.txt";
+ ifstream outStream(outPath.c_str());
+
+ string line;
+ while (getline(outStream, line)) {
+ vector<string> toks;
+ Tokenize(toks, line, "\t");
+ UTIL_THROW_IF2(toks.size() != 2, "Error in transliteration output file. Expecting word\tscore");
+
+ TargetPhrase *tp = new TargetPhrase();
+ Word &word = tp->AddWord();
+ word.CreateFromString(Output, m_output, toks[0], false);
- string str = sourcePhrase.GetWord(0).GetFactor(0)->GetString().as_string();
- str = "TransliterationPhraseDictionary:" + str;
+ float score = Scan<float>(toks[1]);
+ tp->GetScoreBreakdown().PlusEquals(this, score);
- TargetPhrase *tp = new TargetPhrase();
- Word &word = tp->AddWord();
- word.CreateFromString(Output, m_output, str, false);
+ // score of all other ff when this rule is being loaded
+ tp->Evaluate(sourcePhrase, GetFeaturesToApply());
- // score for this phrase table
- vector<float> scores(m_numScoreComponents, 1.3);
- tp->GetScoreBreakdown().PlusEquals(this, scores);
+ ret.push_back(tp);
+ }
- // score of all other ff when this rule is being loaded
- tp->Evaluate(sourcePhrase, GetFeaturesToApply());
+ outStream.close();
- return tp;
+ return ret;
}
ChartRuleLookupManager* TransliterationPhraseDictionary::CreateRuleLookupManager(const ChartParser &parser,
diff --git a/moses/TranslationModel/TransliterationPhraseDictionary.h b/moses/TranslationModel/TransliterationPhraseDictionary.h
index a5b29c8d8..be027eae0 100644
--- a/moses/TranslationModel/TransliterationPhraseDictionary.h
+++ b/moses/TranslationModel/TransliterationPhraseDictionary.h
@@ -30,7 +30,7 @@ public:
protected:
mutable std::list<TargetPhraseCollection*> m_allTPColl;
- TargetPhrase *CreateTargetPhrase(const Phrase &sourcePhrase) const;
+ std::vector<TargetPhrase*> CreateTargetPhrases(const Phrase &sourcePhrase, const std::string &outDir) const;
};
} // namespace Moses
diff --git a/scripts/Transliteration/prepare-transliteration-phrase-table.pl b/scripts/Transliteration/prepare-transliteration-phrase-table.pl
index a1ca939bf..a96964ac9 100755
--- a/scripts/Transliteration/prepare-transliteration-phrase-table.pl
+++ b/scripts/Transliteration/prepare-transliteration-phrase-table.pl
@@ -129,9 +129,10 @@ sub form_corpus
my $UNK_FILE_NAME = basename($OOV_FILE);
my $target = $EVAL_DIR . "/$UNK_FILE_NAME/training/corpus.$OUTPUT_EXTENSION";
-
+ my $outFile = "$EVAL_DIR/out.txt";
open MYFILE, "<:encoding(UTF-8)", $testFile or die "Can't open $testFile: $!\n";
+ open OUTFILE, ">:encoding(UTF-8)", $outFile or die "Can't open $outFile: $!\n";
while (<MYFILE>)
@@ -162,8 +163,9 @@ sub form_corpus
$i++;
$prob = $words[$i];
- print "$thisStr \t $prob\n";
+ print OUTFILE "$thisStr\t$prob\n";
}
close (MYFILE);
+ close (OUTFILE);
}