diff options
3 files changed, 67 insertions, 21 deletions
diff --git a/moses/TranslationModel/TransliterationPhraseDictionary.cpp b/moses/TranslationModel/TransliterationPhraseDictionary.cpp index bdc5b3ec7..c8de4ffc0 100644 --- a/moses/TranslationModel/TransliterationPhraseDictionary.cpp +++ b/moses/TranslationModel/TransliterationPhraseDictionary.cpp @@ -1,4 +1,5 @@ // vim:tabstop=2 +#include <stdlib.h> #include "TransliterationPhraseDictionary.h" #include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h" @@ -19,6 +20,13 @@ void TransliterationPhraseDictionary::CleanUpAfterSentenceProcessing(const Input void TransliterationPhraseDictionary::GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const { + string mosesDir = "/home/hieu/workspace/github/mosesdecoder"; + string scriptDir = mosesDir + "/scripts"; + string externalDir = "/home/hieu/workspace/bin/training-tools"; + string modelDir = "/home/hieu/workspace/experiment/data/issues/transliteration/Transliteration.3"; + string inputLang = "en"; + string outputLang = "ar"; + InputPathList::const_iterator iter; for (iter = inputPathQueue.begin(); iter != inputPathQueue.end(); ++iter) { InputPath &inputPath = **iter; @@ -40,40 +48,76 @@ void TransliterationPhraseDictionary::GetTargetPhraseCollectionBatch(const Input } // TRANSLITERATE - // /home/nadir/mosesdecoder/scripts/Transliteration/prepare-transliteration-phrase-table.pl --transliteration-model-dir /home/nadir/iwslt13-en-ar/model/Transliteration.3 --moses-src-dir /home/nadir/mosesdecoder --external-bin-dir /home/pkoehn/statmt/bin --input-extension en --output-extension ar --oov-file /fs/syn4/nadir/iwslt13-en-ar/evaluation/temp.oov --out-dir /home/nadir/iwslt13-en-ar/model/Transliteration-Phrase-Table.3 - TargetPhrase *tp = CreateTargetPhrase(sourcePhrase); + char *ptr = tmpnam(NULL); + string inFile(ptr); + ptr = tmpnam(NULL); + string outDir(ptr); + + ofstream inStream(inFile.c_str()); + inStream << sourcePhrase.ToString() << endl; + inStream.close(); + + string cmd = scriptDir + "/Transliteration/prepare-transliteration-phrase-table.pl" + + " --transliteration-model-dir " + modelDir + + " --moses-src-dir " + mosesDir + + " --external-bin-dir " + externalDir + + " --input-extension " + inputLang + + " --output-extension " + outputLang + + " --oov-file " + inFile + + " --out-dir " + outDir; + + int ret = system(cmd.c_str()); + UTIL_THROW_IF2(ret != 0, "Transliteration script error"); + TargetPhraseCollection *tpColl = new TargetPhraseCollection(); - tpColl->Add(tp); + vector<TargetPhrase*> targetPhrases = CreateTargetPhrases(sourcePhrase, outDir); + vector<TargetPhrase*>::const_iterator iter; + for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) { + TargetPhrase *tp = *iter; + tpColl->Add(tp); + } m_allTPColl.push_back(tpColl); inputPath.SetTargetPhrases(*this, tpColl, NULL); + remove(inFile.c_str()); + + cmd = "rm -rf " + outDir; + system(cmd.c_str()); } } } -TargetPhrase *TransliterationPhraseDictionary::CreateTargetPhrase(const Phrase &sourcePhrase) const +std::vector<TargetPhrase*> TransliterationPhraseDictionary::CreateTargetPhrases(const Phrase &sourcePhrase, const string &outDir) const { - // create a target phrase from the 1st word of the source, prefix with 'TransliterationPhraseDictionary:' - assert(sourcePhrase.GetSize()); - assert(m_output.size() == 1); + std::vector<TargetPhrase*> ret; + + string outPath = outDir + "/out.txt"; + ifstream outStream(outPath.c_str()); + + string line; + while (getline(outStream, line)) { + vector<string> toks; + Tokenize(toks, line, "\t"); + UTIL_THROW_IF2(toks.size() != 2, "Error in transliteration output file. Expecting word\tscore"); + + TargetPhrase *tp = new TargetPhrase(); + Word &word = tp->AddWord(); + word.CreateFromString(Output, m_output, toks[0], false); - string str = sourcePhrase.GetWord(0).GetFactor(0)->GetString().as_string(); - str = "TransliterationPhraseDictionary:" + str; + float score = Scan<float>(toks[1]); + tp->GetScoreBreakdown().PlusEquals(this, score); - TargetPhrase *tp = new TargetPhrase(); - Word &word = tp->AddWord(); - word.CreateFromString(Output, m_output, str, false); + // score of all other ff when this rule is being loaded + tp->Evaluate(sourcePhrase, GetFeaturesToApply()); - // score for this phrase table - vector<float> scores(m_numScoreComponents, 1.3); - tp->GetScoreBreakdown().PlusEquals(this, scores); + ret.push_back(tp); + } - // score of all other ff when this rule is being loaded - tp->Evaluate(sourcePhrase, GetFeaturesToApply()); + outStream.close(); - return tp; + return ret; } ChartRuleLookupManager* TransliterationPhraseDictionary::CreateRuleLookupManager(const ChartParser &parser, diff --git a/moses/TranslationModel/TransliterationPhraseDictionary.h b/moses/TranslationModel/TransliterationPhraseDictionary.h index a5b29c8d8..be027eae0 100644 --- a/moses/TranslationModel/TransliterationPhraseDictionary.h +++ b/moses/TranslationModel/TransliterationPhraseDictionary.h @@ -30,7 +30,7 @@ public: protected: mutable std::list<TargetPhraseCollection*> m_allTPColl; - TargetPhrase *CreateTargetPhrase(const Phrase &sourcePhrase) const; + std::vector<TargetPhrase*> CreateTargetPhrases(const Phrase &sourcePhrase, const std::string &outDir) const; }; } // namespace Moses diff --git a/scripts/Transliteration/prepare-transliteration-phrase-table.pl b/scripts/Transliteration/prepare-transliteration-phrase-table.pl index a1ca939bf..a96964ac9 100755 --- a/scripts/Transliteration/prepare-transliteration-phrase-table.pl +++ b/scripts/Transliteration/prepare-transliteration-phrase-table.pl @@ -129,9 +129,10 @@ sub form_corpus my $UNK_FILE_NAME = basename($OOV_FILE); my $target = $EVAL_DIR . "/$UNK_FILE_NAME/training/corpus.$OUTPUT_EXTENSION"; - + my $outFile = "$EVAL_DIR/out.txt"; open MYFILE, "<:encoding(UTF-8)", $testFile or die "Can't open $testFile: $!\n"; + open OUTFILE, ">:encoding(UTF-8)", $outFile or die "Can't open $outFile: $!\n"; while (<MYFILE>) @@ -162,8 +163,9 @@ sub form_corpus $i++; $prob = $words[$i]; - print "$thisStr \t $prob\n"; + print OUTFILE "$thisStr\t$prob\n"; } close (MYFILE); + close (OUTFILE); } |