// $Id$ /*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2006 University of Edinburgh This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #include "DecodeStepGeneration.h" #include "GenerationDictionary.h" #include "TranslationOption.h" #include "TranslationOptionCollection.h" #include "PartialTranslOptColl.h" #include "FactorCollection.h" namespace Moses { using namespace std; DecodeStepGeneration::DecodeStepGeneration(const GenerationDictionary* dict, const DecodeStep* prev) : DecodeStep(dict, prev) { } TranslationOption *DecodeStepGeneration::MergeGeneration(const TranslationOption& oldTO, Phrase &mergePhrase , const ScoreComponentCollection& generationScore) const { if (IsFilteringStep()) { if (!oldTO.IsCompatible(mergePhrase, m_conflictFactors)) return NULL; } TranslationOption *newTransOpt = new TranslationOption(oldTO); newTransOpt->MergeNewFeatures(mergePhrase, generationScore, m_newOutputFactors); return newTransOpt; } // helpers typedef pair WordPair; typedef list< WordPair > WordList; // 1st = word // 2nd = score typedef list< WordPair >::const_iterator WordListIterator; /** used in generation: increases iterators when looping through the exponential number of generation expansions */ inline void IncrementIterators(vector< WordListIterator > &wordListIterVector , const vector< WordList > &wordListVector) { for (size_t currPos = 0 ; currPos < wordListVector.size() ; currPos++) { WordListIterator &iter = wordListIterVector[currPos]; iter++; if (iter != wordListVector[currPos].end()) { // eg. 4 -> 5 return; } else { // eg 9 -> 10 iter = wordListVector[currPos].begin(); } } } void DecodeStepGeneration::Process(const TranslationSystem* system , const TranslationOption &inputPartialTranslOpt , const DecodeStep &decodeStep , PartialTranslOptColl &outputPartialTranslOptColl , TranslationOptionCollection * /* toc */ , bool /*adhereTableLimit*/) const { if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0) { // word deletion TranslationOption *newTransOpt = new TranslationOption(inputPartialTranslOpt); outputPartialTranslOptColl.Add(system, newTransOpt); return; } // normal generation step const GenerationDictionary* generationDictionary = decodeStep.GetGenerationDictionaryFeature(); // const WordsRange &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange(); const Phrase &targetPhrase = inputPartialTranslOpt.GetTargetPhrase(); size_t targetLength = targetPhrase.GetSize(); // generation list for each word in phrase vector< WordList > wordListVector(targetLength); // create generation list int wordListVectorPos = 0; for (size_t currPos = 0 ; currPos < targetLength ; currPos++) { // going thorugh all words // generatable factors for this word to be put in wordList WordList &wordList = wordListVector[wordListVectorPos]; const Word &word = targetPhrase.GetWord(currPos); // consult dictionary for possible generations for this word const OutputWordCollection *wordColl = generationDictionary->FindWord(word); if (wordColl == NULL) { // word not found in generation dictionary //toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection); return; // can't be part of a phrase, special handling } else { // sort(*wordColl, CompareWordCollScore); OutputWordCollection::const_iterator iterWordColl; for (iterWordColl = wordColl->begin() ; iterWordColl != wordColl->end(); ++iterWordColl) { const Word &outputWord = (*iterWordColl).first; const ScoreComponentCollection& score = (*iterWordColl).second; // enter into word list generated factor(s) and its(their) score(s) wordList.push_back(WordPair(outputWord, score)); } wordListVectorPos++; // done, next word } } // use generation list (wordList) // set up iterators (total number of expansions) size_t numIteration = 1; vector< WordListIterator > wordListIterVector(targetLength); vector< const Word* > mergeWords(targetLength); for (size_t currPos = 0 ; currPos < targetLength ; currPos++) { wordListIterVector[currPos] = wordListVector[currPos].begin(); numIteration *= wordListVector[currPos].size(); } // go thru each possible factor for each word & create hypothesis for (size_t currIter = 0 ; currIter < numIteration ; currIter++) { ScoreComponentCollection generationScore; // total score for this string of words // create vector of words with new factors for last phrase for (size_t currPos = 0 ; currPos < targetLength ; currPos++) { const WordPair &wordPair = *wordListIterVector[currPos]; mergeWords[currPos] = &(wordPair.first); generationScore.PlusEquals(wordPair.second); } // merge with existing trans opt Phrase genPhrase( mergeWords); TranslationOption *newTransOpt = MergeGeneration(inputPartialTranslOpt, genPhrase, generationScore); if (newTransOpt != NULL) { outputPartialTranslOptColl.Add(system, newTransOpt); } // increment iterators IncrementIterators(wordListIterVector, wordListVector); } } }